Sauerkraut study - Regression-based Screening on functional data (on genes, not pathways)

Code
library(sauerkrautTaxonomyBuddy)
library(SummarizedExperiment) # microbiome analysis
library(mia)                  # microbiome analysis

library(readxl)     # read Excel files
library(mgcv)       # regression model
library(NBZIMM)     # zero-inflated Gaussian mixed regression models
library(parallel)   # parallelize processes on multiple cores

library(dplyr)      # data handling
library(tidyr)      # data transformation
library(ggplot2)    # data visualization
library(patchwork)  # grid of ggplot2
library(ggpubr)     # further ggplot2 grid customizations
library(kableExtra) # print tables

# set ggplot2 theme
theme_set(
  theme_minimal() +
    theme(plot.title       = element_text(hjust = 0.5),
          plot.subtitle    = element_text(hjust = 0.5),
          panel.grid.minor = element_blank(),
          plot.background  = element_rect(fill = "white", color = "white"))
)

# suppress scientific notation (= e notation of decimal numbers)
options(scipen = 999)
Code
# read the Excel file from Till Robin, mapping individual genes to specific SCFAs
path_SCFAgenes <- "9_lookup_SCFA-KEGG-KO.xlsx"
dat_scfaLookup <- read_excel(path_SCFAgenes, sheet = 1) %>% dplyr::select(SCFA, KO)

Data preparation

Code
define_dataPaths()
Code
# KO data
tse_KO <- readAndPrepare_mainFunctionalKOdata(
  path_KEGG_KO                      = path_KEGGKO_funData,
  path_KEGG_pathways                = path_KEGGPathways_funData,
  path_groupInfoRdata               = path_groupInfoRdata,
  path_participantRdata             = path_participantRdata,
  path_samplesIDxlsx                = path_samplesIDxlsx,
  path_dietInfoCsv                  = path_dietInfoCsv,
  path_nutrientInfoCsv              = path_nutrientInfoCsv,
  path_bloodMetabolomeXlsx          = path_bloodMetabolomeXlsx,
  path_stoolMetabolomeXlsx          = path_stoolMetabolomeXlsx,
  path_T4questXlsx                  = path_T4questXlsx,
  path_stoolInfoXlsx                = path_stoolInfoXlsx,
  path_bodyMeasuresXlsx             = path_bodyMeasuresXlsx,
  path_sampleLookupXlsx             = path_sampleLookupXlsx,
  path_krautLookupXlsx              = path_krautLookupXlsx,
  path_bloodMarkerBostonRdata       = path_bloodMarkerBostonRdata,
  path_bloodMarkerZentrallaborRdata = path_bloodMarkerZentrallaborRdata,
  path_bloodMarkerBostonXlsx        = path_bloodMarkerBostonXlsx,
  path_bloodMarkerZentrallaborXlsx  = path_bloodMarkerZentrallaborXlsx,
  exclude_sickParticipants          = "taxonomy paper"
)

# pathway data
tse_PW <- readAndPrepare_mainFunctionalPathwayData(
  path_KEGG_KO                      = path_KEGGKO_funData,
  path_KEGG_pathways                = path_KEGGPathways_funData,
  path_groupInfoRdata               = path_groupInfoRdata,
  path_participantRdata             = path_participantRdata,
  path_samplesIDxlsx                = path_samplesIDxlsx,
  path_dietInfoCsv                  = path_dietInfoCsv,
  path_nutrientInfoCsv              = path_nutrientInfoCsv,
  path_bloodMetabolomeXlsx          = path_bloodMetabolomeXlsx,
  path_stoolMetabolomeXlsx          = path_stoolMetabolomeXlsx,
  path_T4questXlsx                  = path_T4questXlsx,
  path_stoolInfoXlsx                = path_stoolInfoXlsx,
  path_bodyMeasuresXlsx             = path_bodyMeasuresXlsx,
  path_sampleLookupXlsx             = path_sampleLookupXlsx,
  path_krautLookupXlsx              = path_krautLookupXlsx,
  path_bloodMarkerBostonRdata       = path_bloodMarkerBostonRdata,
  path_bloodMarkerZentrallaborRdata = path_bloodMarkerZentrallaborRdata,
  path_bloodMarkerBostonXlsx        = path_bloodMarkerBostonXlsx,
  path_bloodMarkerZentrallaborXlsx  = path_bloodMarkerZentrallaborXlsx,
  exclude_sickParticipants          = "taxonomy paper"
)

# create joint tables with all relevant information
dat_KO <- tse_KO %>% mia::meltAssay(assay.type = "tpmAbd", add_row_data = TRUE, add_col_data = TRUE)
dat_PW <- tse_PW %>% mia::meltAssay(assay.type = "tpmAbd", add_row_data = TRUE, add_col_data = TRUE)
Code
# prepare data for regression models
dat_KO <- dat_KO %>% 
  filter(intervention != "FollowUp") %>% 
  mutate(tpmAbd_log     = log(tpmAbd + 1),
         intervention   = case_when(treatment == "Baseline" ~ "Baseline",
                                    TRUE                    ~ intervention),
         age_centered50 = age - 50,
         bmi_centered25 = bmi_t0 - 25,
         age_cat        = case_when(age < 50 ~ paste0(min(age), "-49"),
                                    TRUE     ~ paste0("50-", max(age))),
         age_cat        = factor(age_cat),
         bmi_cat        = case_when(bmi_t0 < 25 ~ paste0("[", floor(min(bmi_t0)), ", 25)"),
                                    TRUE        ~ paste0("[25, ", ceiling(max(bmi_t0)), "]")),
         bmi_cat        = factor(bmi_cat),
         phase          = case_when(timepoint %in% c("T3","T4") ~ "phase 2",
                                    TRUE                        ~ "phase 1"),
         phase          = factor(phase))

dat_PW <- dat_PW %>% 
  filter(intervention != "FollowUp") %>% 
  mutate(tpmAbd_log     = log(tpmAbd + 1),
         intervention   = case_when(treatment == "Baseline" ~ "Baseline",
                                    TRUE                    ~ intervention),
         age_centered50 = age - 50,
         bmi_centered25 = bmi_t0 - 25,
         age_cat        = case_when(age < 50 ~ paste0(min(age), "-49"),
                                    TRUE     ~ paste0("50-", max(age))),
         age_cat        = factor(age_cat),
         bmi_cat        = case_when(bmi_t0 < 25 ~ paste0("[", floor(min(bmi_t0)), ", 25)"),
                                    TRUE        ~ paste0("[25, ", ceiling(max(bmi_t0)), "]")),
         bmi_cat        = factor(bmi_cat),
         phase          = case_when(timepoint %in% c("T3","T4") ~ "phase 2",
                                    TRUE                        ~ "phase 1"),
         phase          = factor(phase))

Limit pathway data to categories Metabolism, Genetic Information Processing and Cellular Processes.

Code
dat_PW <- dat_PW %>% 
  filter(taxonomy_level1 %in% c("Metabolism", "Genetic Information Processing", "Cellular Processes"))

Check how many genes / pathways were unmapped

Code
datList_relFun <- readAndPrepare_functionalData(path_KEGG_KO             = path_KEGGKO_funData,
                                                path_KEGG_pathways       = path_KEGGPathways_funData,
                                                path_sampleLookupXlsx    = path_sampleLookupXlsx,
                                                path_krautLookupXlsx     = path_krautLookupXlsx,
                                                abundance_unit           = "relAbd",
                                                exclude_sickParticipants = "taxonomy paper")

datCheck_KO <- datList_relFun$datKO_stoolSamples
datCheck_PW <- datList_relFun$datPathways_stoolSamples

avgUnmapped_KO <- datCheck_KO %>% 
  filter(KO == "unknown") %>% 
  select(-KO) %>% 
  unlist() %>% 
  mean()
relAbdMapped_PW <- datCheck_PW %>% 
  select(starts_with("sample_")) %>% 
  colSums()
avgUnmapped_PW <- mean(1 - relAbdMapped_PW)

Regarding the KO gene data, on average 75.6% of genes were unmapped over all samples.
Regarding the pathway data, on average 99.4% of pathways were unmapped over all samples.

Run screening

Run screening on 7123 genes and 164 pathways.

Code
lowAbundanceGenes <- dat_KO %>% 
  mutate(relevantAbundance = (tpmAbd >= 10)) %>% 
  group_by(FeatureID) %>% 
  dplyr::summarize(lowTPMabundance = (sum(relevantAbundance) < 0.1*n())) %>% 
  filter(lowTPMabundance) %>% 
  pull(FeatureID) %>% 
  as.character()

dat_KO <- dat_KO %>% 
  filter(!(FeatureID %in% lowAbundanceGenes))


lowAbundancePathways <- dat_PW %>% 
  mutate(relevantAbundance = (tpmAbd >= 10)) %>% 
  group_by(taxonomy_level3) %>% 
  dplyr::summarize(lowTPMabundance = (sum(relevantAbundance) < 0.1*n())) %>% 
  filter(lowTPMabundance) %>% 
  pull(taxonomy_level3) %>% 
  as.character()

dat_PW <- dat_PW %>% 
  filter(!(taxonomy_level3 %in% lowAbundancePathways))

But before, filtering out 4338 genes and 64 pathways with low TPM abundances (i.e., all that don’t have a minimum abundance of 10 TPM in at least 10% of all samples). Running the screening on the remaining 2785 genes and 100 pathways.

For each gene and pathway, a Generalized Mixed Regression Model is estimated, identical to the models estimated for the blood measurements for the blood paper, with response parameter the TPM abundance.

Model selection is performed by estimating either estimating (A) a Gaussian model, and (B) a Gaussian model on ‘log(y + 1)’ transformed values, or (A) a zero-inflated Gaussian model (with only an intercept predictor for the ZI part, as a general excess zero correction), and (B) a zero-inflated Gaussian model on ‘log(y + 1)’ transformed values for each gene and selecting the best performing one of the respective two as the final model. If at least 10% of a gene’s observations are zero values then the zero-inflated models are estimated, otherwise the non-zero-inflated models are estimated.

Notes:

  • For the non-zero-inflated models the best model is chosen based on the share of explained deviance
  • For the zero-inflated models the best model is chosen based on the lower average of all absolute residuals, since these models are estimated based on a quasi-likelihood approach and the deviance is not available.

Unused alternatives

  • a zero-inflated Poisson model could have been estimated on rounded values. However, a Poisson distribution is not able to account for more strongly right-skewed distributions with a stronger probability mass at zero
  • a Tweedie (p = 1.5) distribution would be a right-skewed continuous distribution with also potential probability mass at zero. However, since (1) it’s not a proper established distribution and (2) the used log transformation already accounts for right-skewedness we don’t need it and don’t use it

Correction for multiple testing is performed by applying the Benjamini-Hochberg (FDR) method to all interventions’ p-values.

Additionally, the intervention effects for every model are estimated in separate interactions with gender (male or female), age (above or below 50), baseline BMI (above or below 25). Independently for every interaction variable (age, sex, BMI) a multiple testing correction is applied.

Note: For these screening analyses we always use a significance threshold of 0.1 instead of 0.05.

The models are estimated with mgcv::gam() and NBZIMM::lme.zig().

Code
# helper function to estimate the models
# - dat_model:     Dataset used for model estimation
# - model_type:    One of c("Gaussian", "ZI Gaussian")
# - model_formula: A model formula created with "as.formula"
estimate_model <- function(dat_model, model_type, model_formula) {
  
  checkmate::assert_data_frame(dat_model)
  checkmate::assert_choice(model_type, choices = c("Gaussian", "ZI Gaussian"))
  checkmate::assert_class(model_formula, classes = "formula")
  
  
  if (model_type == "Gaussian") {
    model <- gam(model_formula, method = "REML", family = "gaussian", data = dat_model)
    
  } else { # model_type = "ZI Gaussian"
    model <- tryCatch({ lme.zig(fixed     = model_formula,
                                zi_fixed  = ~ 1,
                                random    = ~ 1 | participant_id,
                                zi_random = NULL,
                                data      = dat_model) },
                      error = function(e) { return(NULL) })
  }
  
  return(model)
}
Code
# helper function to estimate the interaction models.
# Returns a list with all three estimated models and the respective three summary tables.
# Argument 'selected_model' is the label of the previously selected model type in the model selection process.
estimate_interactionModels <- function(dat_model, selected_model) {
  
  if (selected_model == "ZI Gaussian") {
    # age interaction model
    model_ageInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "ZI Gaussian",
                                   model_formula = as.formula("tpmAbd ~ intervention + phase + gender + age_centered50 + bmi_centered25 + intervention:age_cat"))
    if (is.null(model_ageInt)) { # if model estimation failed
      model_ageInt <- estimate_model(dat_model     = dat_model,
                                     model_type    = "Gaussian",
                                     model_formula = as.formula("tpmAbd ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:age_cat"))
    }
    # sex interaction model
    model_sexInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "ZI Gaussian",
                                   model_formula = as.formula("tpmAbd ~ intervention + phase + gender + age_centered50 + bmi_centered25 + intervention:gender"))
    if (is.null(model_sexInt)) { # if model estimation failed
      model_sexInt <- estimate_model(dat_model     = dat_model,
                                     model_type    = "Gaussian",
                                     model_formula = as.formula("tpmAbd ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:gender"))
    }
    # BMI interaction model
    model_bmiInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "ZI Gaussian",
                                   model_formula = as.formula("tpmAbd ~ intervention + phase + gender + age_centered50 + bmi_centered25 + intervention:bmi_cat"))
    if (is.null(model_bmiInt)) { # if model estimation failed
      model_bmiInt <- estimate_model(dat_model     = dat_model,
                                     model_type    = "Gaussian",
                                     model_formula = as.formula("tpmAbd ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:bmi_cat"))
    }
    
  } else if (selected_model == "ZI log-Gaussian") {
    # age interaction model
    model_ageInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "ZI Gaussian",
                                   model_formula = as.formula("tpmAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25 + intervention:age_cat"))
    if (is.null(model_ageInt)) { # if model estimation failed
      model_ageInt <- estimate_model(dat_model     = dat_model,
                                     model_type    = "Gaussian",
                                     model_formula = as.formula("tpmAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:age_cat"))
    }
    # sex interaction model
    model_sexInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "ZI Gaussian",
                                   model_formula = as.formula("tpmAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25 + intervention:gender"))
    if (is.null(model_sexInt)) { # if model estimation failed
      model_sexInt <- estimate_model(dat_model     = dat_model,
                                     model_type    = "Gaussian",
                                     model_formula = as.formula("tpmAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:gender"))
    }
    # BMI interaction model
    model_bmiInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "ZI Gaussian",
                                   model_formula = as.formula("tpmAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25 + intervention:bmi_cat"))
    if (is.null(model_bmiInt)) { # if model estimation failed
      model_bmiInt <- estimate_model(dat_model     = dat_model,
                                     model_type    = "Gaussian",
                                     model_formula = as.formula("tpmAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:bmi_cat"))
    }
    
  } else if (selected_model == "Gaussian") {
    # age interaction model
    model_ageInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "Gaussian",
                                   model_formula = as.formula("tpmAbd ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:age_cat"))
    # sex interaction model
    model_sexInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "Gaussian",
                                   model_formula = as.formula("tpmAbd ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:gender"))
    # BMI interaction model
    model_bmiInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "Gaussian",
                                   model_formula = as.formula("tpmAbd ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:bmi_cat"))
    
  } else { # model_label == "log-Gaussian"
    # age interaction model
    model_ageInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "Gaussian",
                                   model_formula = as.formula("tpmAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:age_cat"))
    # sex interaction model
    model_sexInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "Gaussian",
                                   model_formula = as.formula("tpmAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:gender"))
    # BMI interaction model
    model_bmiInt <- estimate_model(dat_model     = dat_model,
                                   model_type    = "Gaussian",
                                   model_formula = as.formula("tpmAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're') + intervention:bmi_cat"))
  }
  
  
  ### prepare results
  # age interaction model
  if (class(model_ageInt)[1] == "zigmm") {
    tab_ageInt           <- summary(model_ageInt)$tTable
    colnames(tab_ageInt) <- c("Estimate", "Std. Error", "DF", "t-value", "Pr(>|t|)") # make names consistent to gam tab names
  } else { # non-ZI model
    tab_ageInt           <- summary(model_ageInt)$p.table
  }
  # sex interaction model
  if (class(model_sexInt)[1] == "zigmm") {
    tab_sexInt           <- summary(model_sexInt)$tTable
    colnames(tab_sexInt) <- c("Estimate", "Std. Error", "DF", "t-value", "Pr(>|t|)") # make names consistent to gam tab names
  } else { # non-ZI model
    tab_sexInt           <- summary(model_sexInt)$p.table
  }
  # BMI interaction model
  if (class(model_bmiInt)[1] == "zigmm") {
    tab_bmiInt           <- summary(model_bmiInt)$tTable
    colnames(tab_bmiInt) <- c("Estimate", "Std. Error", "DF", "t-value", "Pr(>|t|)") # make names consistent to gam tab names
  } else { # non-ZI model
    tab_bmiInt           <- summary(model_bmiInt)$p.table
  }

  
  list(model_ageInt = model_ageInt,
       model_sexInt = model_sexInt,
       model_bmiInt = model_bmiInt,
       tab_ageInt   = tab_ageInt,
       tab_sexInt   = tab_sexInt,
       tab_bmiInt   = tab_bmiInt)
}
Code
# main screening function for genes
runScreening_oneGene <- function(gene) {
  
  message("Process gene ", gene, " (", match(gene, genes), "/", length(genes), ")")
  
  dat_gene <- dat_KO[dat_KO$FeatureID == gene,]
  
  # 1) main model estimation, based on model selection
  estimate_ZImodels <- sum(dat_gene$tpmAbd == 0) >= (0.1*nrow(dat_gene))
  if (estimate_ZImodels) { # estimate zero-inflated models
    
    # these zero-inflated models sometimes run into an estimation error. In this case, ignore the respective model.
    # If both models run into an estimation error, simply estimate the non-zero-inflated models instead.
    model_list <- list("ZI Gaussian"     = estimate_model(dat_model     = dat_gene,
                                                          model_type    = "ZI Gaussian",
                                                          model_formula = as.formula("tpmAbd ~ intervention + phase + gender + age_centered50 + bmi_centered25")),
                       "ZI log-Gaussian" = estimate_model(dat_model     = dat_gene,
                                                          model_type    = "ZI Gaussian",
                                                          model_formula = as.formula("tpmAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25")))
    
    fallBack_toNonZImodels <- ifelse(is.null(model_list[[1]]) & is.null(model_list[[2]]), TRUE, FALSE)
    
    if (!fallBack_toNonZImodels) {
      
      if (is.null(model_list[[1]]) | is.null(model_list[[2]])) {
        null_index <- unname(which(sapply(model_list, function(x) { is.null(x) })))
        model_list <- model_list[-null_index]
      }
      
      bestModel_index <- unname(which.min(sapply(model_list, function(x) { mean(abs(summary(x)$residuals)) })))
      model           <- model_list[[bestModel_index]]
      model_label     <- names(model_list)[bestModel_index]
      tab             <- summary(model)$tTable
      colnames(tab)   <- c("Estimate", "Std. Error", "DF", "t-value", "Pr(>|t|)") # make names consistent to gam tab names
    }
    
  } else {
    fallBack_toNonZImodels <- FALSE # simply to have this object defined also if ZI models are not estimated
  }
  
  if (!estimate_ZImodels || fallBack_toNonZImodels) { # estimate models without zero-inflation
    
    model_list <- list("Gaussian"     = estimate_model(dat_model     = dat_gene,
                                                       model_type    = "Gaussian",
                                                       model_formula = as.formula("tpmAbd ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're')")),
                       "log-Gaussian" = estimate_model(dat_model     = dat_gene,
                                                       model_type    = "Gaussian",
                                                       model_formula = as.formula("tpmAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're')")))
    
    bestModel_index <- unname(which.max(sapply(model_list, function(x) { summary(x)$dev.expl })))
    model           <- model_list[[bestModel_index]]
    model_label     <- names(model_list)[bestModel_index]
    tab             <- summary(model)$p.table
    
  }
  
  
  # 2) interaction model estimation, only for the final selected model
  # model estimation with the lower category of the interaction variable as the reference category
  model_intList <- estimate_interactionModels(dat_model      = dat_gene,
                                              selected_model = model_label)
  # model estimation with the higher category of the interaction variable as the reference category
  dat_gene_changedRef         <- dat_gene
  dat_gene_changedRef$gender  <- relevel(dat_gene_changedRef$gender,  ref = "W")
  dat_gene_changedRef$age_cat <- relevel(dat_gene_changedRef$age_cat, ref = "50-69")
  dat_gene_changedRef$bmi_cat <- relevel(dat_gene_changedRef$bmi_cat, ref = "[25, 31]")
  model_intList_changedRef  <- estimate_interactionModels(dat_model      = dat_gene_changedRef,
                                                          selected_model = model_label)
  
  # create joint table
  tab_intEffects <- data.frame(effect_type    = c("fresh intervention", "pasteurized intervention"),
                               effect_ageLow  = c(model_intList$tab_ageInt["interventionFresh", "Estimate"],
                                                  model_intList$tab_ageInt["interventionPasteurized", "Estimate"]),
                               effect_ageHigh = c(model_intList_changedRef$tab_ageInt["interventionFresh", "Estimate"],
                                                  model_intList_changedRef$tab_ageInt["interventionPasteurized", "Estimate"]),
                               se_ageLow      = c(model_intList$tab_ageInt["interventionFresh", "Std. Error"],
                                                  model_intList$tab_ageInt["interventionPasteurized", "Std. Error"]),
                               se_ageHigh     = c(model_intList_changedRef$tab_ageInt["interventionFresh", "Std. Error"],
                                                  model_intList_changedRef$tab_ageInt["interventionPasteurized", "Std. Error"]),
                               pvalue_ageLow  = c(model_intList$tab_ageInt["interventionFresh", "Pr(>|t|)"],
                                                  model_intList$tab_ageInt["interventionPasteurized", "Pr(>|t|)"]),
                               pvalue_ageHigh = c(model_intList_changedRef$tab_ageInt["interventionFresh", "Pr(>|t|)"],
                                                  model_intList_changedRef$tab_ageInt["interventionPasteurized", "Pr(>|t|)"]),
                               effect_sexM    = c(model_intList$tab_sexInt["interventionFresh", "Estimate"],
                                                  model_intList$tab_sexInt["interventionPasteurized", "Estimate"]),
                               effect_sexW    = c(model_intList_changedRef$tab_sexInt["interventionFresh", "Estimate"],
                                                  model_intList_changedRef$tab_sexInt["interventionPasteurized", "Estimate"]),
                               se_sexM        = c(model_intList$tab_sexInt["interventionFresh", "Std. Error"],
                                                  model_intList$tab_sexInt["interventionPasteurized", "Std. Error"]),
                               se_sexW        = c(model_intList_changedRef$tab_sexInt["interventionFresh", "Std. Error"],
                                                  model_intList_changedRef$tab_sexInt["interventionPasteurized", "Std. Error"]),
                               pvalue_sexM    = c(model_intList$tab_sexInt["interventionFresh", "Pr(>|t|)"],
                                                  model_intList$tab_sexInt["interventionPasteurized", "Pr(>|t|)"]),
                               pvalue_sexW    = c(model_intList_changedRef$tab_sexInt["interventionFresh", "Pr(>|t|)"],
                                                  model_intList_changedRef$tab_sexInt["interventionPasteurized", "Pr(>|t|)"]),
                               effect_bmiLow  = c(model_intList$tab_bmiInt["interventionFresh", "Estimate"],
                                                  model_intList$tab_bmiInt["interventionPasteurized", "Estimate"]),
                               effect_bmiHigh = c(model_intList_changedRef$tab_bmiInt["interventionFresh", "Estimate"],
                                                  model_intList_changedRef$tab_bmiInt["interventionPasteurized", "Estimate"]),
                               se_bmiLow      = c(model_intList$tab_bmiInt["interventionFresh", "Std. Error"],
                                                  model_intList$tab_bmiInt["interventionPasteurized", "Std. Error"]),
                               se_bmiHigh     = c(model_intList_changedRef$tab_bmiInt["interventionFresh", "Std. Error"],
                                                  model_intList_changedRef$tab_bmiInt["interventionPasteurized", "Std. Error"]),
                               pvalue_bmiLow  = c(model_intList$tab_bmiInt["interventionFresh", "Pr(>|t|)"],
                                                  model_intList$tab_bmiInt["interventionPasteurized", "Pr(>|t|)"]),
                               pvalue_bmiHigh = c(model_intList_changedRef$tab_bmiInt["interventionFresh", "Pr(>|t|)"],
                                                  model_intList_changedRef$tab_bmiInt["interventionPasteurized", "Pr(>|t|)"]))
  
  
  # 3) gather results
  data.frame(gene               = gene,
             relFreq_zeroTPM    = sum(dat_gene$tpmAbd == 0) / nrow(dat_gene),
             relFreq_min10TPM   = sum(dat_gene$tpmAbd >= 10) / nrow(dat_gene),
             min_tpmAbd         = min(dat_gene$tpmAbd),
             mean_tpmAbd        = mean(dat_gene$tpmAbd),
             median_tpmAbd      = median(dat_gene$tpmAbd),
             max_tpmAbd         = max(dat_gene$tpmAbd),
             baseline_medianTPMAbd = median(dat_gene$tpmAbd[dat_gene$inter_treat %in% c("Baseline.Fresh","Baseline.Pasteurized")]),
             baseline_prevalence     = unname(prop.table(table(dat_gene$tpmAbd[dat_gene$inter_treat %in% c("Baseline.Fresh","Baseline.Pasteurized")] > 10))["TRUE"]),
             postFresh_medianTPMAbd  = median(dat_gene$tpmAbd[dat_gene$inter_treat == "After.Fresh"]),
             postFresh_prevalence    = unname(prop.table(table(dat_gene$tpmAbd[dat_gene$inter_treat == "After.Fresh"] > 10))["TRUE"]),
             postPast_medianTPMAbd   = median(dat_gene$tpmAbd[dat_gene$inter_treat == "After.Pasteurized"]),
             postPast_prevalence     = unname(prop.table(table(dat_gene$tpmAbd[dat_gene$inter_treat == "After.Pasteurized"] > 10))["TRUE"]),
             model              = model_label,
             model_deviance     = ifelse(!grepl("ZI", model_label), summary(model)$dev.expl, NA),
             model_meanAbsResid = ifelse(grepl("ZI", model_label), mean(abs(summary(model)$residuals)), NA),
             freshInt_effect    = tab["interventionFresh", "Estimate"],
             freshInt_se        = tab["interventionFresh", "Std. Error"],
             freshInt_pvalue    = tab["interventionFresh", "Pr(>|t|)"],
             pastInt_effect     = tab["interventionPasteurized", "Estimate"],
             pastInt_se         = tab["interventionPasteurized", "Std. Error"],
             pastInt_pvalue     = tab["interventionPasteurized", "Pr(>|t|)"],
             phaseEffect        = tab["phasephase 2", "Estimate"],
             genderEffect       = tab["genderW", "Estimate"],
             ageEffect          = tab["age_centered50", "Estimate"],
             bmiEffect          = tab["bmi_centered25", "Estimate"],
             freshInt_ageLow_effect  = tab_intEffects$effect_ageLow[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_ageHigh_effect = tab_intEffects$effect_ageHigh[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_ageLow_se      = tab_intEffects$se_ageLow[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_ageHigh_se     = tab_intEffects$se_ageHigh[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_ageLow_pvalue  = tab_intEffects$pvalue_ageLow[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_ageHigh_pvalue = tab_intEffects$pvalue_ageHigh[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_sexM_effect    = tab_intEffects$effect_sexM[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_sexW_effect    = tab_intEffects$effect_sexW[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_sexM_se        = tab_intEffects$se_sexM[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_sexW_se        = tab_intEffects$se_sexW[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_sexM_pvalue    = tab_intEffects$pvalue_sexM[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_sexW_pvalue    = tab_intEffects$pvalue_sexW[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_bmiLow_effect  = tab_intEffects$effect_bmiLow[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_bmiHigh_effect = tab_intEffects$effect_bmiHigh[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_bmiLow_se      = tab_intEffects$se_bmiLow[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_bmiHigh_se     = tab_intEffects$se_bmiHigh[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_bmiLow_pvalue  = tab_intEffects$pvalue_bmiLow[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_bmiHigh_pvalue = tab_intEffects$pvalue_bmiHigh[tab_intEffects$effect_type == "fresh intervention"],
             pastInt_ageLow_effect   = tab_intEffects$effect_ageLow[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_ageHigh_effect  = tab_intEffects$effect_ageHigh[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_ageLow_se       = tab_intEffects$se_ageLow[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_ageHigh_se      = tab_intEffects$se_ageHigh[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_ageLow_pvalue   = tab_intEffects$pvalue_ageLow[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_ageHigh_pvalue  = tab_intEffects$pvalue_ageHigh[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_sexM_effect     = tab_intEffects$effect_sexM[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_sexW_effect     = tab_intEffects$effect_sexW[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_sexM_se         = tab_intEffects$se_sexM[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_sexW_se         = tab_intEffects$se_sexW[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_sexM_pvalue     = tab_intEffects$pvalue_sexM[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_sexW_pvalue     = tab_intEffects$pvalue_sexW[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_bmiLow_effect   = tab_intEffects$effect_bmiLow[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_bmiHigh_effect  = tab_intEffects$effect_bmiHigh[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_bmiLow_se       = tab_intEffects$se_bmiLow[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_bmiHigh_se      = tab_intEffects$se_bmiHigh[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_bmiLow_pvalue   = tab_intEffects$pvalue_bmiLow[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_bmiHigh_pvalue  = tab_intEffects$pvalue_bmiHigh[tab_intEffects$effect_type == "pasteurized intervention"])
}


# main screening function for pathways
runScreening_onePathway <- function(pathway) {
  
  message("Process pathway ", pathway, " (", match(pathway, pathways), "/", length(pathways), ")")
  
  dat_pathway <- dat_PW[dat_PW$taxonomy_level3 == pathway,]
  
  # 1) main model estimation, based on model selection
  estimate_ZImodels <- sum(dat_pathway$tpmAbd == 0) >= (0.1*nrow(dat_pathway))
  if (estimate_ZImodels) { # estimate zero-inflated models
    
    # these zero-inflated models sometimes run into an estimation error. In this case, ignore the respective model.
    # If both models run into an estimation error, simply estimate the non-zero-inflated models instead.
    model_list <- list("ZI Gaussian"     = estimate_model(dat_model     = dat_pathway,
                                                          model_type    = "ZI Gaussian",
                                                          model_formula = as.formula("tpmAbd ~ intervention + phase + gender + age_centered50 + bmi_centered25")),
                       "ZI log-Gaussian" = estimate_model(dat_model     = dat_pathway,
                                                          model_type    = "ZI Gaussian",
                                                          model_formula = as.formula("tpmAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25")))
    
    fallBack_toNonZImodels <- ifelse(is.null(model_list[[1]]) & is.null(model_list[[2]]), TRUE, FALSE)
    
    if (!fallBack_toNonZImodels) {
      
      if (is.null(model_list[[1]]) | is.null(model_list[[2]])) {
        null_index <- unname(which(sapply(model_list, function(x) { is.null(x) })))
        model_list <- model_list[-null_index]
      }
      
      bestModel_index <- unname(which.min(sapply(model_list, function(x) { mean(abs(summary(x)$residuals)) })))
      model           <- model_list[[bestModel_index]]
      model_label     <- names(model_list)[bestModel_index]
      tab             <- summary(model)$tTable
      colnames(tab)   <- c("Estimate", "Std. Error", "DF", "t-value", "Pr(>|t|)") # make names consistent to gam tab names
    }
    
  } else {
    fallBack_toNonZImodels <- FALSE # simply to have this object defined also if ZI models are not estimated
  }
  
  if (!estimate_ZImodels || fallBack_toNonZImodels) { # estimate models without zero-inflation
    
    model_list <- list("Gaussian"     = estimate_model(dat_model     = dat_pathway,
                                                       model_type    = "Gaussian",
                                                       model_formula = as.formula("tpmAbd ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're')")),
                       "log-Gaussian" = estimate_model(dat_model     = dat_pathway,
                                                       model_type    = "Gaussian",
                                                       model_formula = as.formula("tpmAbd_log ~ intervention + phase + gender + age_centered50 + bmi_centered25 + s(participant_id, bs = 're')")))
    
    bestModel_index <- unname(which.max(sapply(model_list, function(x) { summary(x)$dev.expl })))
    model           <- model_list[[bestModel_index]]
    model_label     <- names(model_list)[bestModel_index]
    tab             <- summary(model)$p.table
    
  }
  
  
  # 2) interaction model estimation, only for the final selected model
  # model estimation with the lower category of the interaction variable as the reference category
  model_intList <- estimate_interactionModels(dat_model      = dat_pathway,
                                              selected_model = model_label)
  # model estimation with the higher category of the interaction variable as the reference category
  dat_pathway_changedRef         <- dat_pathway
  dat_pathway_changedRef$gender  <- relevel(dat_pathway_changedRef$gender,  ref = "W")
  dat_pathway_changedRef$age_cat <- relevel(dat_pathway_changedRef$age_cat, ref = "50-69")
  dat_pathway_changedRef$bmi_cat <- relevel(dat_pathway_changedRef$bmi_cat, ref = "[25, 31]")
  model_intList_changedRef  <- estimate_interactionModels(dat_model      = dat_pathway_changedRef,
                                                          selected_model = model_label)
  
  # create joint table
  tab_intEffects <- data.frame(effect_type    = c("fresh intervention", "pasteurized intervention"),
                               effect_ageLow  = c(model_intList$tab_ageInt["interventionFresh", "Estimate"],
                                                  model_intList$tab_ageInt["interventionPasteurized", "Estimate"]),
                               effect_ageHigh = c(model_intList_changedRef$tab_ageInt["interventionFresh", "Estimate"],
                                                  model_intList_changedRef$tab_ageInt["interventionPasteurized", "Estimate"]),
                               se_ageLow      = c(model_intList$tab_ageInt["interventionFresh", "Std. Error"],
                                                  model_intList$tab_ageInt["interventionPasteurized", "Std. Error"]),
                               se_ageHigh     = c(model_intList_changedRef$tab_ageInt["interventionFresh", "Std. Error"],
                                                  model_intList_changedRef$tab_ageInt["interventionPasteurized", "Std. Error"]),
                               pvalue_ageLow  = c(model_intList$tab_ageInt["interventionFresh", "Pr(>|t|)"],
                                                  model_intList$tab_ageInt["interventionPasteurized", "Pr(>|t|)"]),
                               pvalue_ageHigh = c(model_intList_changedRef$tab_ageInt["interventionFresh", "Pr(>|t|)"],
                                                  model_intList_changedRef$tab_ageInt["interventionPasteurized", "Pr(>|t|)"]),
                               effect_sexM    = c(model_intList$tab_sexInt["interventionFresh", "Estimate"],
                                                  model_intList$tab_sexInt["interventionPasteurized", "Estimate"]),
                               effect_sexW    = c(model_intList_changedRef$tab_sexInt["interventionFresh", "Estimate"],
                                                  model_intList_changedRef$tab_sexInt["interventionPasteurized", "Estimate"]),
                               se_sexM        = c(model_intList$tab_sexInt["interventionFresh", "Std. Error"],
                                                  model_intList$tab_sexInt["interventionPasteurized", "Std. Error"]),
                               se_sexW        = c(model_intList_changedRef$tab_sexInt["interventionFresh", "Std. Error"],
                                                  model_intList_changedRef$tab_sexInt["interventionPasteurized", "Std. Error"]),
                               pvalue_sexM    = c(model_intList$tab_sexInt["interventionFresh", "Pr(>|t|)"],
                                                  model_intList$tab_sexInt["interventionPasteurized", "Pr(>|t|)"]),
                               pvalue_sexW    = c(model_intList_changedRef$tab_sexInt["interventionFresh", "Pr(>|t|)"],
                                                  model_intList_changedRef$tab_sexInt["interventionPasteurized", "Pr(>|t|)"]),
                               effect_bmiLow  = c(model_intList$tab_bmiInt["interventionFresh", "Estimate"],
                                                  model_intList$tab_bmiInt["interventionPasteurized", "Estimate"]),
                               effect_bmiHigh = c(model_intList_changedRef$tab_bmiInt["interventionFresh", "Estimate"],
                                                  model_intList_changedRef$tab_bmiInt["interventionPasteurized", "Estimate"]),
                               se_bmiLow      = c(model_intList$tab_bmiInt["interventionFresh", "Std. Error"],
                                                  model_intList$tab_bmiInt["interventionPasteurized", "Std. Error"]),
                               se_bmiHigh     = c(model_intList_changedRef$tab_bmiInt["interventionFresh", "Std. Error"],
                                                  model_intList_changedRef$tab_bmiInt["interventionPasteurized", "Std. Error"]),
                               pvalue_bmiLow  = c(model_intList$tab_bmiInt["interventionFresh", "Pr(>|t|)"],
                                                  model_intList$tab_bmiInt["interventionPasteurized", "Pr(>|t|)"]),
                               pvalue_bmiHigh = c(model_intList_changedRef$tab_bmiInt["interventionFresh", "Pr(>|t|)"],
                                                  model_intList_changedRef$tab_bmiInt["interventionPasteurized", "Pr(>|t|)"]))
  
  
  # 3) gather results
  data.frame(pathway            = pathway,
             relFreq_zeroTPM    = sum(dat_pathway$tpmAbd == 0) / nrow(dat_pathway),
             relFreq_min10TPM   = sum(dat_pathway$tpmAbd >= 10) / nrow(dat_pathway),
             min_tpmAbd         = min(dat_pathway$tpmAbd),
             mean_tpmAbd        = mean(dat_pathway$tpmAbd),
             median_tpmAbd      = median(dat_pathway$tpmAbd),
             max_tpmAbd         = max(dat_pathway$tpmAbd),
             baseline_medianTPMAbd = median(dat_pathway$tpmAbd[dat_pathway$inter_treat %in% c("Baseline.Fresh","Baseline.Pasteurized")]),
             baseline_prevalence     = unname(prop.table(table(dat_pathway$tpmAbd[dat_pathway$inter_treat %in% c("Baseline.Fresh","Baseline.Pasteurized")] > 10))["TRUE"]),
             postFresh_medianTPMAbd  = median(dat_pathway$tpmAbd[dat_pathway$inter_treat == "After.Fresh"]),
             postFresh_prevalence    = unname(prop.table(table(dat_pathway$tpmAbd[dat_pathway$inter_treat == "After.Fresh"] > 10))["TRUE"]),
             postPast_medianTPMAbd   = median(dat_pathway$tpmAbd[dat_pathway$inter_treat == "After.Pasteurized"]),
             postPast_prevalence     = unname(prop.table(table(dat_pathway$tpmAbd[dat_pathway$inter_treat == "After.Pasteurized"] > 10))["TRUE"]),
             model              = model_label,
             model_deviance     = ifelse(!grepl("ZI", model_label), summary(model)$dev.expl, NA),
             model_meanAbsResid = ifelse(grepl("ZI", model_label), mean(abs(summary(model)$residuals)), NA),
             freshInt_effect    = tab["interventionFresh", "Estimate"],
             freshInt_se        = tab["interventionFresh", "Std. Error"],
             freshInt_pvalue    = tab["interventionFresh", "Pr(>|t|)"],
             pastInt_effect     = tab["interventionPasteurized", "Estimate"],
             pastInt_se         = tab["interventionPasteurized", "Std. Error"],
             pastInt_pvalue     = tab["interventionPasteurized", "Pr(>|t|)"],
             phaseEffect        = tab["phasephase 2", "Estimate"],
             genderEffect       = tab["genderW", "Estimate"],
             ageEffect          = tab["age_centered50", "Estimate"],
             bmiEffect          = tab["bmi_centered25", "Estimate"],
             freshInt_ageLow_effect  = tab_intEffects$effect_ageLow[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_ageHigh_effect = tab_intEffects$effect_ageHigh[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_ageLow_se      = tab_intEffects$se_ageLow[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_ageHigh_se     = tab_intEffects$se_ageHigh[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_ageLow_pvalue  = tab_intEffects$pvalue_ageLow[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_ageHigh_pvalue = tab_intEffects$pvalue_ageHigh[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_sexM_effect    = tab_intEffects$effect_sexM[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_sexW_effect    = tab_intEffects$effect_sexW[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_sexM_se        = tab_intEffects$se_sexM[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_sexW_se        = tab_intEffects$se_sexW[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_sexM_pvalue    = tab_intEffects$pvalue_sexM[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_sexW_pvalue    = tab_intEffects$pvalue_sexW[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_bmiLow_effect  = tab_intEffects$effect_bmiLow[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_bmiHigh_effect = tab_intEffects$effect_bmiHigh[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_bmiLow_se      = tab_intEffects$se_bmiLow[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_bmiHigh_se     = tab_intEffects$se_bmiHigh[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_bmiLow_pvalue  = tab_intEffects$pvalue_bmiLow[tab_intEffects$effect_type == "fresh intervention"],
             freshInt_bmiHigh_pvalue = tab_intEffects$pvalue_bmiHigh[tab_intEffects$effect_type == "fresh intervention"],
             pastInt_ageLow_effect   = tab_intEffects$effect_ageLow[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_ageHigh_effect  = tab_intEffects$effect_ageHigh[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_ageLow_se       = tab_intEffects$se_ageLow[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_ageHigh_se      = tab_intEffects$se_ageHigh[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_ageLow_pvalue   = tab_intEffects$pvalue_ageLow[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_ageHigh_pvalue  = tab_intEffects$pvalue_ageHigh[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_sexM_effect     = tab_intEffects$effect_sexM[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_sexW_effect     = tab_intEffects$effect_sexW[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_sexM_se         = tab_intEffects$se_sexM[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_sexW_se         = tab_intEffects$se_sexW[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_sexM_pvalue     = tab_intEffects$pvalue_sexM[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_sexW_pvalue     = tab_intEffects$pvalue_sexW[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_bmiLow_effect   = tab_intEffects$effect_bmiLow[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_bmiHigh_effect  = tab_intEffects$effect_bmiHigh[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_bmiLow_se       = tab_intEffects$se_bmiLow[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_bmiHigh_se      = tab_intEffects$se_bmiHigh[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_bmiLow_pvalue   = tab_intEffects$pvalue_bmiLow[tab_intEffects$effect_type == "pasteurized intervention"],
             pastInt_bmiHigh_pvalue  = tab_intEffects$pvalue_bmiHigh[tab_intEffects$effect_type == "pasteurized intervention"])
}
Code
# code for running the screening. It takes about 15 minutes on 15 cores
# to estimate the results. Instead of newly estimating them each time
# the html is generated, the results were saved after running the screening
# once, and are now just read from the results csv
dat_KOresults <- read.csv("9_genesAndPathways_regressionScreening_KOresults.csv")

# genes <- dat_KO %>% filter(FeatureID != "unknown") %>% pull(FeatureID) %>% unique() %>% as.character()
# 
# # number of cores to use for the parallelized call
# n_cores <- 15
# 
# # create a cluster object for Windows parallelization
# cl <- makeCluster(n_cores)
# 
# # export necessary functions / data objects to the cluster workers
# clusterExport(cl, c("runScreening_oneGene", "estimate_model", "estimate_interactionModels",
#                     "genes", "dat_KO", "gam", "lme.zig", "summary.gam"))
# 
# # run the screening (parallelized call)
# datList_KOresults <- parLapply(cl, genes, runScreening_oneGene)
# 
# # stop the Windows cluster
# stopCluster(cl)
# 
# # merge results datasets
# dat_KOresults <- datList_KOresults %>% dplyr::bind_rows()
# 
# # perform multiple testing correction and round values in the data
# p_corrected       <- p.adjust(c(dat_KOresults$freshInt_pvalue, dat_KOresults$pastInt_pvalue), method = "BH")
# pAgeInt_corrected <- p.adjust(c(dat_KOresults$freshInt_ageLow_pvalue,  dat_KOresults$pastInt_ageLow_pvalue,
#                                 dat_KOresults$freshInt_ageHigh_pvalue, dat_KOresults$pastInt_ageHigh_pvalue), method = "BH")
# pSexInt_corrected <- p.adjust(c(dat_KOresults$freshInt_sexM_pvalue,  dat_KOresults$pastInt_sexM_pvalue,
#                                 dat_KOresults$freshInt_sexW_pvalue, dat_KOresults$pastInt_sexW_pvalue), method = "BH")
# pBMIInt_corrected <- p.adjust(c(dat_KOresults$freshInt_bmiLow_pvalue,  dat_KOresults$pastInt_bmiLow_pvalue,
#                                 dat_KOresults$freshInt_bmiHigh_pvalue, dat_KOresults$pastInt_bmiHigh_pvalue), method = "BH")
# n             <- nrow(dat_KOresults)
# dat_KOresults <- dat_KOresults %>%
#   mutate(freshInt_pvalue_correct          = p_corrected[1:n],
#          pastInt_pvalue_correct           = p_corrected[(n+1):(2*n)],
#          freshInt_ageLow_pvalue_correct   = pAgeInt_corrected[1:n],
#          pastInt_ageLow_pvalue_correct    = pAgeInt_corrected[(n+1):(2*n)],
#          freshInt_ageHigh_pvalue_correct  = pAgeInt_corrected[(2*n+1):(3*n)],
#          pastInt_ageHigh_pvalue_correct   = pAgeInt_corrected[(3*n+1):(4*n)],
#          freshInt_sexM_pvalue_correct     = pSexInt_corrected[1:n],
#          pastInt_sexM_pvalue_correct      = pSexInt_corrected[(n+1):(2*n)],
#          freshInt_sexW_pvalue_correct     = pSexInt_corrected[(2*n+1):(3*n)],
#          pastInt_sexW_pvalue_correct      = pSexInt_corrected[(3*n+1):(4*n)],
#          freshInt_bmiLow_pvalue_correct   = pBMIInt_corrected[1:n],
#          pastInt_bmiLow_pvalue_correct    = pBMIInt_corrected[(n+1):(2*n)],
#          freshInt_bmiHigh_pvalue_correct  = pBMIInt_corrected[(2*n+1):(3*n)],
#          pastInt_bmiHigh_pvalue_correct   = pBMIInt_corrected[(3*n+1):(4*n)]) %>%
#   mutate_at(c(2:13, 15:ncol(.)), function(x) { round(x, 4) })
# 
# # add information if the genes relate to specific SCFAs
# dat_KOresults <- dat_KOresults %>%
#   dplyr::left_join(dat_scfaLookup, by = c("gene" = "KO")) %>%
#   dplyr::select(SCFA, everything())
# 
# write.csv(dat_KOresults, file = "9_genesAndPathways_regressionScreening_KOresults.csv", row.names = FALSE)
Code
# code for running the screening. It takes ~22 seconds on 15 cores
# to estimate the results. Instead of newly estimating them each time
# the html is generated, the results were saved after running the screening
# once, and are now just read from the results csv
dat_PWresults <- read.csv("9_genesAndPathways_regressionScreening_PWresults.csv")

# pathways <- dat_PW %>% pull(taxonomy_level3) %>% unique() %>% as.character()
# 
# # number of cores to use for the parallelized call
# n_cores <- 15
# 
# # create a cluster object for Windows parallelization
# cl <- makeCluster(n_cores)
# 
# # export necessary functions / data objects to the cluster workers
# clusterExport(cl, c("runScreening_onePathway", "estimate_model", "estimate_interactionModels",
#                     "pathways", "dat_PW", "gam", "lme.zig", "summary.gam"))
# 
# # run the screening (parallelized call)
# datList_PWresults <- parLapply(cl, pathways, runScreening_onePathway)
# 
# # stop the Windows cluster
# stopCluster(cl)
# 
# # merge results datasets
# dat_PWresults <- datList_PWresults %>% dplyr::bind_rows()
# 
# # perform multiple testing correction and round values in the data
# p_corrected       <- p.adjust(c(dat_PWresults$freshInt_pvalue, dat_PWresults$pastInt_pvalue), method = "BH")
# pAgeInt_corrected <- p.adjust(c(dat_PWresults$freshInt_ageLow_pvalue,  dat_PWresults$pastInt_ageLow_pvalue,
#                                 dat_PWresults$freshInt_ageHigh_pvalue, dat_PWresults$pastInt_ageHigh_pvalue), method = "BH")
# pSexInt_corrected <- p.adjust(c(dat_PWresults$freshInt_sexM_pvalue,  dat_PWresults$pastInt_sexM_pvalue,
#                                 dat_PWresults$freshInt_sexW_pvalue, dat_PWresults$pastInt_sexW_pvalue), method = "BH")
# pBMIInt_corrected <- p.adjust(c(dat_PWresults$freshInt_bmiLow_pvalue,  dat_PWresults$pastInt_bmiLow_pvalue,
#                                 dat_PWresults$freshInt_bmiHigh_pvalue, dat_PWresults$pastInt_bmiHigh_pvalue), method = "BH")
# n             <- nrow(dat_PWresults)
# dat_PWresults <- dat_PWresults %>%
#   mutate(freshInt_pvalue_correct          = p_corrected[1:n],
#          pastInt_pvalue_correct           = p_corrected[(n+1):(2*n)],
#          freshInt_ageLow_pvalue_correct   = pAgeInt_corrected[1:n],
#          pastInt_ageLow_pvalue_correct    = pAgeInt_corrected[(n+1):(2*n)],
#          freshInt_ageHigh_pvalue_correct  = pAgeInt_corrected[(2*n+1):(3*n)],
#          pastInt_ageHigh_pvalue_correct   = pAgeInt_corrected[(3*n+1):(4*n)],
#          freshInt_sexM_pvalue_correct     = pSexInt_corrected[1:n],
#          pastInt_sexM_pvalue_correct      = pSexInt_corrected[(n+1):(2*n)],
#          freshInt_sexW_pvalue_correct     = pSexInt_corrected[(2*n+1):(3*n)],
#          pastInt_sexW_pvalue_correct      = pSexInt_corrected[(3*n+1):(4*n)],
#          freshInt_bmiLow_pvalue_correct   = pBMIInt_corrected[1:n],
#          pastInt_bmiLow_pvalue_correct    = pBMIInt_corrected[(n+1):(2*n)],
#          freshInt_bmiHigh_pvalue_correct  = pBMIInt_corrected[(2*n+1):(3*n)],
#          pastInt_bmiHigh_pvalue_correct   = pBMIInt_corrected[(3*n+1):(4*n)]) %>%
#   mutate_at(c(2:13, 15:ncol(.)), function(x) { round(x, 4) })
# 
# write.csv(dat_PWresults, file = "9_genesAndPathways_regressionScreening_PWresults.csv", row.names = FALSE)

SCFA genes

Code
dat_SCFAresults <- dat_KOresults %>% filter(!is.na(SCFA))

Perform multiple testing correction of p-values only on the SCFA tests

Currently, this doesn’t change anything in the number of significant genes. Re-checked this, and everything is already. That’s just the result.

Code
p_corrected       <- p.adjust(c(dat_SCFAresults$freshInt_pvalue, dat_SCFAresults$pastInt_pvalue), method = "BH")
pAgeInt_corrected <- p.adjust(c(dat_SCFAresults$freshInt_ageLow_pvalue,  dat_SCFAresults$pastInt_ageLow_pvalue,
                                dat_SCFAresults$freshInt_ageHigh_pvalue, dat_SCFAresults$pastInt_ageHigh_pvalue), method = "BH")
pSexInt_corrected <- p.adjust(c(dat_SCFAresults$freshInt_sexM_pvalue,  dat_SCFAresults$pastInt_sexM_pvalue,
                                dat_SCFAresults$freshInt_sexW_pvalue, dat_SCFAresults$pastInt_sexW_pvalue), method = "BH")
pBMIInt_corrected <- p.adjust(c(dat_SCFAresults$freshInt_bmiLow_pvalue,  dat_SCFAresults$pastInt_bmiLow_pvalue,
                                dat_SCFAresults$freshInt_bmiHigh_pvalue, dat_SCFAresults$pastInt_bmiHigh_pvalue), method = "BH")
n               <- nrow(dat_SCFAresults)
dat_SCFAresults <- dat_SCFAresults %>%
  mutate(freshInt_pvalue_correct          = p_corrected[1:n] %>% round(4),
         pastInt_pvalue_correct           = p_corrected[(n+1):(2*n)] %>% round(4),
         freshInt_ageLow_pvalue_correct   = pAgeInt_corrected[1:n] %>% round(4),
         pastInt_ageLow_pvalue_correct    = pAgeInt_corrected[(n+1):(2*n)] %>% round(4),
         freshInt_ageHigh_pvalue_correct  = pAgeInt_corrected[(2*n+1):(3*n)] %>% round(4),
         pastInt_ageHigh_pvalue_correct   = pAgeInt_corrected[(3*n+1):(4*n)] %>% round(4),
         freshInt_sexM_pvalue_correct     = pSexInt_corrected[1:n] %>% round(4),
         pastInt_sexM_pvalue_correct      = pSexInt_corrected[(n+1):(2*n)] %>% round(4),
         freshInt_sexW_pvalue_correct     = pSexInt_corrected[(2*n+1):(3*n)] %>% round(4),
         pastInt_sexW_pvalue_correct      = pSexInt_corrected[(3*n+1):(4*n)] %>% round(4),
         freshInt_bmiLow_pvalue_correct   = pBMIInt_corrected[1:n] %>% round(4),
         pastInt_bmiLow_pvalue_correct    = pBMIInt_corrected[(n+1):(2*n)] %>% round(4),
         freshInt_bmiHigh_pvalue_correct  = pBMIInt_corrected[(2*n+1):(3*n)] %>% round(4),
         pastInt_bmiHigh_pvalue_correct   = pBMIInt_corrected[(3*n+1):(4*n)] %>% round(4)) %>% 
  mutate_at(c(3:14, 16:ncol(.)), function(x) { round(x, 4) })
Code
# some description
# dat_KO %>% 
#   dplyr::left_join(dat_scfaLookup, by = c("FeatureID" = "KO")) %>% 
#   filter(!is.na(SCFA),
#          intervention != "FollowUp") %>% 
#   group_by(FeatureID, timepoint) %>% 
#   dplyr::summarize(sampleShare_atLeast100TPM = sum(tpmAbd >= 100) / n(),
#             inter_treat               = first(inter_treat)) %>% 
#   group_by(FeatureID) %>% 
#   mutate(minY_forSorting = min(sampleShare_atLeast100TPM)) %>% 
#   ungroup() %>%
#   arrange(desc(minY_forSorting)) %>% 
#   mutate(FeatureID = factor(FeatureID, levels = unique(as.character(FeatureID)))) %>% 
#   ggplot(aes(y = FeatureID, x = inter_treat, fill = sampleShare_atLeast100TPM)) +
#   geom_raster()

SCFA results

Overview of the numbers of significant genes
Code
data.frame("what"  = c("No. of SCFA genes",
                       "... that didn't have low abundance",
                       "No. of significant genes (fresh intervention)",
                       "No. of significant genes (past. intervention)"),
           "value" = c(nrow(dat_scfaLookup),
                       nrow(dat_SCFAresults),
                       sum(dat_SCFAresults$freshInt_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_SCFAresults$pastInt_pvalue_correct < 0.1, na.rm = TRUE))) %>% 
  kable() %>% 
  kable_styling()
what value
No. of SCFA genes 193
... that didn't have low abundance 89
No. of significant genes (fresh intervention) 0
No. of significant genes (past. intervention) 2

… alternatively, looking at the raw p-values, not corrected for multiple testing

Code
data.frame("what"  = c("No. of SCFA genes",
                       "... that didn't have low abundance",
                       "No. of significant genes (fresh intervention)",
                       "No. of significant genes (past. intervention)"),
           "value" = c(nrow(dat_scfaLookup),
                       nrow(dat_SCFAresults),
                       sum(dat_SCFAresults$freshInt_pvalue < 0.1, na.rm = TRUE),
                       sum(dat_SCFAresults$pastInt_pvalue < 0.1, na.rm = TRUE))) %>% 
  kable() %>% 
  kable_styling()
what value
No. of SCFA genes 193
... that didn't have low abundance 89
No. of significant genes (fresh intervention) 4
No. of significant genes (past. intervention) 26
Overview which specific genes are significant (without multiple testing correction)
Code
dat_SCFAresults %>% 
  filter((freshInt_pvalue < 0.1) |
           (pastInt_pvalue < 0.1)) %>% 
  dplyr::select(-contains("correct")) %>% 
  mutate(across(contains("pvalue"), function(x) { ifelse(x < 0.1, x, "not significant") })) %>% 
  dplyr::select(SCFA, gene, contains("pvalue")) %>% 
  arrange(freshInt_pvalue, pastInt_pvalue) %>% 
  kable() %>% 
  kable_styling()
SCFA gene freshInt_pvalue pastInt_pvalue freshInt_ageLow_pvalue freshInt_ageHigh_pvalue freshInt_sexM_pvalue freshInt_sexW_pvalue freshInt_bmiLow_pvalue freshInt_bmiHigh_pvalue pastInt_ageLow_pvalue pastInt_ageHigh_pvalue pastInt_sexM_pvalue pastInt_sexW_pvalue pastInt_bmiLow_pvalue pastInt_bmiHigh_pvalue
Pyruvate K13290 0.0248 not significant not significant 0.0673 not significant 0.0911 not significant 0.027 not significant not significant not significant not significant not significant not significant
Pyruvate K13497 0.0297 0 0.0373 not significant not significant 0.0395 0.0162 not significant 0 not significant 0.0613 0.0001 0.0001 not significant
Pyruvate K01573 0.0783 not significant not significant 0.0168 0.046 not significant not significant not significant not significant not significant not significant not significant not significant not significant
Pyruvate K09022 0.0829 0.0075 not significant 0.0207 not significant 0.0409 0.0896 not significant 0.0106 not significant 0.0244 not significant 0.0184 not significant
Pyruvate K14155 not significant 0.0011 not significant not significant not significant not significant not significant not significant 0.0011 not significant 0.0056 0.0561 0.0227 0.0168
Pyruvate K05878 not significant 0.0034 not significant not significant not significant not significant not significant not significant 0.0014 not significant not significant 0.01 0.0177 0.0853
Pyruvate K01958 not significant 0.0048 not significant not significant not significant not significant not significant not significant 0.0044 not significant not significant 0.0015 0.0078 not significant
Butyryl-CoA K00249 not significant 0.0077 not significant not significant not significant not significant not significant not significant 0.0004 not significant 0.0897 0.04 0.0068 not significant
Pyruvate K00172 not significant 0.0084 not significant not significant not significant 0.0562 not significant not significant 0.018 not significant not significant 0.0009 0.0298 not significant
Acetate K01646 not significant 0.0097 not significant not significant not significant not significant not significant not significant not significant 0.0144 0.0277 not significant 0.0343 not significant
Acetate K01034 not significant 0.0102 not significant not significant not significant not significant not significant not significant 0.0029 not significant 0.0674 0.0702 0.0404 not significant
Butyryl-CoA K00209 not significant 0.0108 not significant not significant not significant not significant not significant not significant 0.0128 not significant not significant 0.0083 0.0194 not significant
Acetyl-CoA K00059 not significant 0.0131 not significant not significant 0.0992 not significant 0.0993 not significant 0.0583 not significant 0.0157 not significant 0.0065 not significant
Pyruvate K00170 not significant 0.0153 not significant not significant not significant 0.0493 not significant not significant 0.0469 not significant not significant 0.0038 0.0287 not significant
Pyruvate K01714 not significant 0.0157 not significant not significant not significant not significant not significant not significant 0.0883 0.0866 0.022 not significant 0.0279 not significant
Pyruvate K00382 not significant 0.0167 not significant not significant not significant not significant not significant not significant 0.0339 not significant not significant 0.0409 0.013 not significant
Acetyl-CoA K00640 not significant 0.0205 not significant not significant not significant not significant not significant not significant 0.0306 not significant 0.0222 not significant 0.0128 not significant
Acetate K01443 not significant 0.0221 not significant not significant not significant not significant not significant not significant 0.0422 not significant 0.0924 not significant 0.0214 not significant
Acetyl-CoA K04072 not significant 0.0302 not significant not significant not significant not significant 0.0968 not significant not significant not significant 0.0179 not significant 0.0459 not significant
Pyruvate K00171 not significant 0.0307 not significant not significant not significant not significant not significant not significant 0.0436 not significant not significant 0.0043 0.053 not significant
Acetyl-CoA K00140 not significant 0.0309 not significant not significant 0.0803 not significant not significant not significant not significant not significant 0.0485 not significant 0.012 not significant
Acetate K01739 not significant 0.0338 not significant 0.0054 not significant not significant not significant not significant not significant not significant 0.0665 not significant not significant not significant
Acetate K01060 not significant 0.0528 not significant not significant not significant not significant not significant not significant 0.0222 not significant not significant 0.0612 not significant not significant
Acetate K01644 not significant 0.053 not significant not significant 0.0654 not significant not significant not significant not significant not significant 0.0308 not significant 0.0683 not significant
Acetate K00925 not significant 0.0573 not significant not significant not significant not significant not significant not significant 0.044 not significant 0.0168 not significant 0.0973 not significant
Pyruvate K05879 not significant 0.0609 not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.02 0.0793 not significant
Pyruvate K03737 not significant 0.0664 not significant not significant not significant not significant not significant not significant not significant not significant 0.0698 not significant 0.0808 not significant
Acetate K01740 not significant 0.0939 not significant not significant not significant not significant not significant not significant 0.0538 not significant 0.0701 not significant 0.0925 not significant
Figures for publication
Code
# prepare the base dataset for the plot
dat_SCFAsignif <- dat_SCFAresults %>% 
  filter((freshInt_pvalue_correct < 0.1) | (pastInt_pvalue_correct < 0.1))
dat_plot <- data.frame(SCFA         = rep(dat_SCFAsignif$SCFA,  times = 2),
                       gene         = rep(dat_SCFAsignif$gene,  times = 2),
                       baseline_medianTPMAbd = rep(dat_SCFAsignif$baseline_medianTPMAbd, times = 2),
                       baseline_prevalence    = rep(dat_SCFAsignif$baseline_prevalence,    times = 2),
                       postFresh_medianTPMAbd = rep(dat_SCFAsignif$postFresh_medianTPMAbd, times = 2),
                       postFresh_prevalence   = rep(dat_SCFAsignif$postFresh_prevalence,   times = 2),
                       postPast_medianTPMAbd  = rep(dat_SCFAsignif$postPast_medianTPMAbd,  times = 2),
                       postPast_prevalence    = rep(dat_SCFAsignif$postPast_prevalence,    times = 2),
                       model        = rep(dat_SCFAsignif$model, times = 2),
                       intervention = rep(c("Fresh intervention", "Pasteurized intervention"), each = nrow(dat_SCFAsignif)),
                       effect       = c(dat_SCFAsignif$freshInt_effect, dat_SCFAsignif$pastInt_effect),
                       qvalue       = c(dat_SCFAsignif$freshInt_pvalue_correct, dat_SCFAsignif$pastInt_pvalue_correct)) %>% 
  arrange(SCFA, desc(effect)) %>% 
  mutate(pvalue_signif = case_when((qvalue < 0.1) & grepl("Fresh", intervention) & (effect > 0) ~ "significant pos. effect (fresh intervention)",
                                   (qvalue < 0.1) & grepl("Fresh", intervention) & (effect < 0) ~ "significant neg. effect (fresh intervention)",
                                   (qvalue < 0.1) & (effect > 0) ~ "significant pos. effect (pasteurized intervention)",
                                   (qvalue < 0.1) & (effect < 0) ~ "significant neg. effect (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         effect_label  = case_when((model == "Gaussian") & (effect >= 1)        ~ paste0("+", round(effect, 0), " TPM"),
                                   (model == "Gaussian") & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " TPM"),
                                   (model == "Gaussian") & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " TPM"),
                                   (model == "Gaussian") & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " TPM"),
                                   (model == "Gaussian") & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " TPM"),
                                   (model == "Gaussian") & (effect >= 0)        ~ "+ <0.0001 TPM",
                                   (model == "Gaussian") & (effect <= -1)       ~ paste0(round(effect, 0), " TPM"),
                                   (model == "Gaussian") & (effect <= -0.1)     ~ paste0(round(effect, 1), " TPM"),
                                   (model == "Gaussian") & (effect <= -0.01)    ~ paste0(round(effect, 2), " TPM"),
                                   (model == "Gaussian") & (effect <= -0.001)   ~ paste0(round(effect, 3), " TPM"),
                                   (model == "Gaussian") & (effect <= -0.0001)  ~ paste0(round(effect, 4), " TPM"),
                                   (model == "Gaussian") & (effect < 0)                   ~ "- >-0.0001 TPM",
                                   (model == "log-Gaussian") & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model == "log-Gaussian") & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model == "log-Gaussian") & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model == "log-Gaussian") & (effect >= 0)                                ~ "+ <0.01%",
                                   (model == "log-Gaussian") & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model == "log-Gaussian") & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model == "log-Gaussian") & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model == "log-Gaussian") & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$gene)), function(gen) {
  dat_gene <- dat_plot %>% filter(gene == gen)
  dat_gene %>% 
    dplyr::bind_rows(data.frame(SCFA          = dat_gene$SCFA[1],
                                gene          = gen,
                                intervention  = "Baseline median TPM abd.\n(prevalence)",
                                pvalue_signif = "",
                                effect_label_TPMAbd = case_when(dat_gene$baseline_medianTPMAbd[1] < .0001 ~ "<0.0001 TPM",
                                                                dat_gene$baseline_medianTPMAbd[1] < .01   ~ paste0(round(dat_gene$baseline_medianTPMAbd[1], 4), " TPM"),
                                                                dat_gene$baseline_medianTPMAbd[1] < .1    ~ paste0(round(dat_gene$baseline_medianTPMAbd[1], 3), " TPM"),
                                                                dat_gene$baseline_medianTPMAbd[1] < 1     ~ paste0(round(dat_gene$baseline_medianTPMAbd[1], 2), " TPM"),
                                                                dat_gene$baseline_medianTPMAbd[1] < 10    ~ paste0(round(dat_gene$baseline_medianTPMAbd[1], 1), " TPM"),
                                                                TRUE ~ paste0(round(dat_gene$baseline_medianTPMAbd[1], 0), " TPM")),
                                effect_label_prevalence = case_when(dat_gene$baseline_prevalence[1] < 0.01 ~ paste0(round(100 * dat_gene$baseline_prevalence[1], 1), "%"),
                                                                    TRUE                                   ~ paste0(round(100 * dat_gene$baseline_prevalence[1], 0), "%"))) %>% 
                       mutate(effect_label = paste0(effect_label_TPMAbd, "\n(", effect_label_prevalence, ")"))) %>% 
    dplyr::bind_rows(data.frame(SCFA          = dat_gene$SCFA[1],
                                gene          = gen,
                                intervention  = "After fresh median TPM abd.\n(prevalence)",
                                pvalue_signif = "",
                                effect_label_TPMAbd = case_when(dat_gene$postFresh_medianTPMAbd[1] < .0001 ~ "<0.0001 TPM",
                                                                dat_gene$postFresh_medianTPMAbd[1] < .01   ~ paste0(round(dat_gene$postFresh_medianTPMAbd[1], 4), " TPM"),
                                                                dat_gene$postFresh_medianTPMAbd[1] < .1    ~ paste0(round(dat_gene$postFresh_medianTPMAbd[1], 3), " TPM"),
                                                                dat_gene$postFresh_medianTPMAbd[1] < 1     ~ paste0(round(dat_gene$postFresh_medianTPMAbd[1], 2), " TPM"),
                                                                dat_gene$postFresh_medianTPMAbd[1] < 10    ~ paste0(round(dat_gene$postFresh_medianTPMAbd[1], 1), " TPM"),
                                                                TRUE ~ paste0(round(dat_gene$postFresh_medianTPMAbd[1], 0), " TPM")),
                                effect_label_prevalence = case_when(dat_gene$postFresh_prevalence[1] < 0.01 ~ paste0(round(100 * dat_gene$postFresh_prevalence[1], 1), "%"),
                                                                    TRUE                                    ~ paste0(round(100 * dat_gene$postFresh_prevalence[1], 0), "%"))) %>%
                       mutate(effect_label = paste0(effect_label_TPMAbd, "\n(", effect_label_prevalence, ")"))) %>%
    dplyr::bind_rows(data.frame(SCFA          = dat_gene$SCFA[1],
                                gene          = gen,
                                intervention  = "After past. median TPM abd.\n(prevalence)",
                                pvalue_signif = "",
                                effect_label_TPMAbd = case_when(dat_gene$postPast_medianTPMAbd[1] < .0001 ~ "<0.0001 TPM",
                                                                dat_gene$postPast_medianTPMAbd[1] < .01   ~ paste0(round(dat_gene$postPast_medianTPMAbd[1], 4), " TPM"),
                                                                dat_gene$postPast_medianTPMAbd[1] < .1    ~ paste0(round(dat_gene$postPast_medianTPMAbd[1], 3), " TPM"),
                                                                dat_gene$postPast_medianTPMAbd[1] < 1     ~ paste0(round(dat_gene$postPast_medianTPMAbd[1], 2), " TPM"),
                                                                dat_gene$postPast_medianTPMAbd[1] < 10    ~ paste0(round(dat_gene$postPast_medianTPMAbd[1], 1), " TPM"),
                                                                TRUE ~ paste0(round(dat_gene$postPast_medianTPMAbd[1], 0), " TPM")),
                                effect_label_prevalence = case_when(dat_gene$postPast_prevalence[1] < 0.01 ~ paste0(round(100 * dat_gene$postPast_prevalence[1], 1), "%"),
                                                                    TRUE                                   ~ paste0(round(100 * dat_gene$postPast_prevalence[1], 0), "%"))) %>%
                       mutate(effect_label = paste0(effect_label_TPMAbd, "\n(", effect_label_prevalence, ")")))
})
dat_plot_overall <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(gene         = factor(gene, levels = rev(unique(gene))),
         intervention = factor(intervention, levels = c("Baseline median TPM abd.\n(prevalence)",
                                                        "After fresh median TPM abd.\n(prevalence)",
                                                        "After past. median TPM abd.\n(prevalence)",
                                                        "Fresh intervention", "Pasteurized intervention")),
         text_size    = case_when(grepl("intervention", intervention) ~ 4,
                                  TRUE                                ~ 3),
         pvalue_signif = factor(pvalue_signif, levels = c("not significant",
                                                          "significant pos. effect (fresh intervention)",
                                                          "significant neg. effect (fresh intervention)",
                                                          "significant pos. effect (pasteurized intervention)",
                                                          "significant neg. effect (pasteurized intervention)",
                                                          "")))

# plot
dat_plot_overall %>% 
  ggplot(aes(x = intervention, y = gene)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("q-value", values = c("not significant" = "gray95",
                                          "significant pos. effect (fresh intervention)" = "#7cbde9",
                                          "significant neg. effect (fresh intervention)" = "#1F78B4",
                                          "significant pos. effect (pasteurized intervention)" = "#ffb366",
                                          "significant neg. effect (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white") +
  facet_grid(rows = vars(SCFA), scales = "free", space = "free") +
  ggtitle("Intervention effects on SCFA-related genes") +
  theme(axis.title = element_blank(),
        panel.grid = element_blank(),
        strip.background = element_rect(fill = "gray97", color = "gray97"))

Code
# ggsave("functional_geneScreening_qvalues.png", width = 10, height = 3)
Code
# prepare the base dataset for the plot
dat_SCFAsignif <- dat_SCFAresults %>% 
  filter((freshInt_pvalue < 0.05) | (pastInt_pvalue < 0.05))
dat_plot <- data.frame(SCFA         = rep(dat_SCFAsignif$SCFA,  times = 2),
                       gene         = rep(dat_SCFAsignif$gene,  times = 2),
                       baseline_medianTPMAbd = rep(dat_SCFAsignif$baseline_medianTPMAbd, times = 2),
                       model        = rep(dat_SCFAsignif$model, times = 2),
                       intervention = rep(c("Fresh intervention", "Pasteurized intervention"), each = nrow(dat_SCFAsignif)),
                       effect       = c(dat_SCFAsignif$freshInt_effect, dat_SCFAsignif$pastInt_effect),
                       pvalue       = c(dat_SCFAsignif$freshInt_pvalue, dat_SCFAsignif$pastInt_pvalue)) %>% 
  arrange(SCFA, desc(effect)) %>% 
  mutate(pvalue_signif = case_when((pvalue < 0.05) & (intervention == "Fresh intervention") ~ "significant (fresh intervention)",
                                   pvalue < 0.05 ~ "significant (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         effect_label  = case_when((model == "Gaussian") & (effect >= 1)        ~ paste0("+", round(effect, 0), " TPM"),
                                   (model == "Gaussian") & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " TPM"),
                                   (model == "Gaussian") & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " TPM"),
                                   (model == "Gaussian") & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " TPM"),
                                   (model == "Gaussian") & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " TPM"),
                                   (model == "Gaussian") & (effect >= 0)        ~ "+ <0.0001 TPM",
                                   (model == "Gaussian") & (effect <= -1)       ~ paste0(round(effect, 0), " TPM"),
                                   (model == "Gaussian") & (effect <= -0.1)     ~ paste0(round(effect, 1), " TPM"),
                                   (model == "Gaussian") & (effect <= -0.01)    ~ paste0(round(effect, 2), " TPM"),
                                   (model == "Gaussian") & (effect <= -0.001)   ~ paste0(round(effect, 3), " TPM"),
                                   (model == "Gaussian") & (effect <= -0.0001)  ~ paste0(round(effect, 4), " TPM"),
                                   (model == "Gaussian") & (effect < 0)                   ~ "- >-0.0001 TPM",
                                   (model == "log-Gaussian") & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model == "log-Gaussian") & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model == "log-Gaussian") & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model == "log-Gaussian") & (effect >= 0)                                ~ "+ <0.01%",
                                   (model == "log-Gaussian") & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model == "log-Gaussian") & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model == "log-Gaussian") & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model == "log-Gaussian") & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$gene)), function(gen) {
  dat_gene <- dat_plot %>% filter(gene == gen)
  dat_gene %>% 
    dplyr::bind_rows(data.frame(SCFA          = dat_gene$SCFA[1],
                                gene          = gen,
                                intervention  = "Baseline median TPM abd.",
                                pvalue_signif = "",
                                effect_label  = case_when(dat_gene$baseline_medianTPMAbd < .0001 ~ "<0.0001 TPM",
                                                          dat_gene$baseline_medianTPMAbd < .001  ~ paste0(round(dat_gene$baseline_medianTPMAbd, 4), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < .01   ~ paste0(round(dat_gene$baseline_medianTPMAbd, 3), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < .1    ~ paste0(round(dat_gene$baseline_medianTPMAbd, 2), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < 1     ~ paste0(round(dat_gene$baseline_medianTPMAbd, 1), " TPM"),
                                                          TRUE ~ paste0(round(dat_gene$baseline_medianTPMAbd, 0), " TPM"))))
})
dat_plot <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(gene         = factor(gene, levels = rev(unique(gene))),
         intervention = factor(intervention, levels = c("Baseline median TPM abd.", "Fresh intervention", "Pasteurized intervention")))

# plot
dat_plot %>% 
  ggplot(aes(x = intervention, y = gene)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("uncorrected p-value", values = c("not significant" = "gray95",
                                                      "significant (fresh intervention)" = "#1F78B4",
                                                      "significant (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white") +
  facet_grid(rows = vars(SCFA), scales = "free", space = "free") +
  ggtitle("Intervention effects on SCFA-related genes") +
  theme(axis.title = element_blank(),
        panel.grid = element_blank(),
        strip.background = element_rect(fill = "gray97", color = "gray97"))

Code
# ggsave("FigureS16_geneScreening_pvalues.png", width = 10, height = 7.5)

Results for interaction effects

Overview of the numbers of significant genes
Code
data.frame("what"  = c("No. of SCFA genes",
                       "... that didn't have low abundance",
                       "No. of significant genes (fresh intervention - age low)",
                       "No. of significant genes (fresh intervention - age high)",
                       "No. of significant genes (past. intervention - age low)",
                       "No. of significant genes (past. intervention - age high)",
                       "No. of significant genes (fresh intervention - gender male)",
                       "No. of significant genes (fresh intervention - gender female)",
                       "No. of significant genes (past. intervention - gender male)",
                       "No. of significant genes (past. intervention - gender female)",
                       "No. of significant genes (fresh intervention - BMI low)",
                       "No. of significant genes (fresh intervention - BMI high)",
                       "No. of significant genes (past. intervention - BMI low)",
                       "No. of significant genes (past. intervention - BMI high)"),
           "value" = c(nrow(dat_scfaLookup),
                       nrow(dat_SCFAresults),
                       sum(dat_SCFAresults$freshInt_ageLow_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_SCFAresults$freshInt_ageHigh_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_SCFAresults$pastInt_ageLow_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_SCFAresults$pastInt_ageHigh_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_SCFAresults$freshInt_sexM_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_SCFAresults$freshInt_sexW_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_SCFAresults$pastInt_sexM_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_SCFAresults$pastInt_sexW_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_SCFAresults$freshInt_bmiLow_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_SCFAresults$freshInt_bmiHigh_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_SCFAresults$pastInt_bmiLow_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_SCFAresults$pastInt_bmiHigh_pvalue_correct < 0.1, na.rm = TRUE))) %>% 
  kable() %>% 
  kable_styling()
what value
No. of SCFA genes 193
... that didn't have low abundance 89
No. of significant genes (fresh intervention - age low) 0
No. of significant genes (fresh intervention - age high) 0
No. of significant genes (past. intervention - age low) 2
No. of significant genes (past. intervention - age high) 0
No. of significant genes (fresh intervention - gender male) 0
No. of significant genes (fresh intervention - gender female) 0
No. of significant genes (past. intervention - gender male) 0
No. of significant genes (past. intervention - gender female) 1
No. of significant genes (fresh intervention - BMI low) 0
No. of significant genes (fresh intervention - BMI high) 0
No. of significant genes (past. intervention - BMI low) 1
No. of significant genes (past. intervention - BMI high) 0
… alternatively looking at the uncorrected p-values
Code
data.frame("what"  = c("No. of SCFA genes",
                       "... that didn't have low abundance",
                       "No. of significant genes (fresh intervention - age low)",
                       "No. of significant genes (fresh intervention - age high)",
                       "No. of significant genes (past. intervention - age low)",
                       "No. of significant genes (past. intervention - age high)",
                       "No. of significant genes (fresh intervention - gender male)",
                       "No. of significant genes (fresh intervention - gender female)",
                       "No. of significant genes (past. intervention - gender male)",
                       "No. of significant genes (past. intervention - gender female)",
                       "No. of significant genes (fresh intervention - BMI low)",
                       "No. of significant genes (fresh intervention - BMI high)",
                       "No. of significant genes (past. intervention - BMI low)",
                       "No. of significant genes (past. intervention - BMI high)"),
           "value" = c(nrow(dat_scfaLookup),
                       nrow(dat_SCFAresults),
                       sum(dat_SCFAresults$freshInt_ageLow_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_SCFAresults$freshInt_ageHigh_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_SCFAresults$pastInt_ageLow_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_SCFAresults$pastInt_ageHigh_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_SCFAresults$freshInt_sexM_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_SCFAresults$freshInt_sexW_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_SCFAresults$pastInt_sexM_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_SCFAresults$pastInt_sexW_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_SCFAresults$freshInt_bmiLow_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_SCFAresults$freshInt_bmiHigh_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_SCFAresults$pastInt_bmiLow_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_SCFAresults$pastInt_bmiHigh_pvalue < 0.05, na.rm = TRUE))) %>% 
  kable() %>% 
  kable_styling()
what value
No. of SCFA genes 193
... that didn't have low abundance 89
No. of significant genes (fresh intervention - age low) 1
No. of significant genes (fresh intervention - age high) 5
No. of significant genes (past. intervention - age low) 21
No. of significant genes (past. intervention - age high) 1
No. of significant genes (fresh intervention - gender male) 1
No. of significant genes (fresh intervention - gender female) 4
No. of significant genes (past. intervention - gender male) 12
No. of significant genes (past. intervention - gender female) 11
No. of significant genes (fresh intervention - BMI low) 2
No. of significant genes (fresh intervention - BMI high) 2
No. of significant genes (past. intervention - BMI low) 23
No. of significant genes (past. intervention - BMI high) 1

Age

Figures for publication
Code
# prepare the base dataset for the plot
dat_signif <- dat_SCFAresults %>% 
  filter((freshInt_ageLow_pvalue_correct < 0.1) | (freshInt_ageHigh_pvalue_correct < 0.1) |
           (pastInt_ageLow_pvalue_correct < 0.1) | (pastInt_ageHigh_pvalue_correct < 0.1))
dat_plot <- data.frame(SCFA          = rep(dat_signif$SCFA,  times = 4),
                       gene          = rep(dat_signif$gene,  times = 4),
                       baseline_medianTPMAbd = rep(dat_signif$baseline_medianTPMAbd, times = 4),
                       model         = rep(dat_signif$model, times = 4),
                       intervention  = rep(c("Fresh intervention\n(age low)", "Fresh intervention\n(age high)",
                                             "Pasteurized intervention\n(age low)", "Pasteurized intervention\n(age high)"), each = nrow(dat_signif)),
                       effect        = c(dat_signif$freshInt_ageLow_effect, dat_signif$freshInt_ageHigh_effect,
                                         dat_signif$pastInt_ageLow_effect,  dat_signif$pastInt_ageHigh_effect),
                       qvalue        = c(dat_signif$freshInt_ageLow_pvalue_correct, dat_signif$freshInt_ageHigh_pvalue_correct,
                                         dat_signif$pastInt_ageLow_pvalue_correct,  dat_signif$pastInt_ageHigh_pvalue_correct),
                       pastEffect_forSorting = rep(dat_signif$pastInt_ageLow_effect, times = 4)) %>% 
  arrange(SCFA, desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((qvalue < 0.1) & grepl("Fresh", intervention) & (effect > 0) ~ "significant pos. effect (fresh intervention)",
                                   (qvalue < 0.1) & grepl("Fresh", intervention) & (effect < 0) ~ "significant neg. effect (fresh intervention)",
                                   (qvalue < 0.1) & (effect > 0) ~ "significant pos. effect (pasteurized intervention)",
                                   (qvalue < 0.1) & (effect < 0) ~ "significant neg. effect (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         effect_label  = case_when((model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 1)        ~ paste0("+", round(effect, 0), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0)        ~ "+ <0.0001 TPM",
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -1)       ~ paste0(round(effect, 0), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.1)     ~ paste0(round(effect, 1), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.01)    ~ paste0(round(effect, 2), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.001)   ~ paste0(round(effect, 3), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.0001)  ~ paste0(round(effect, 4), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect < 0)                   ~ "- >-0.0001 TPM",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect >= 0)                                ~ "+ <0.01%",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$gene)), function(gen) {
  dat_gene <- dat_plot %>% filter(gene == gen)
  dat_gene %>% 
    dplyr::bind_rows(data.frame(SCFA          = dat_gene$SCFA[1],
                                gene          = gen,
                                intervention  = "Baseline median TPM abd.",
                                pvalue_signif = "",
                                effect_label  = case_when(dat_gene$baseline_medianTPMAbd < .0001 ~ "<0.0001 TPM",
                                                          dat_gene$baseline_medianTPMAbd < .001  ~ paste0(round(dat_gene$baseline_medianTPMAbd, 4), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < .01   ~ paste0(round(dat_gene$baseline_medianTPMAbd, 3), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < .1    ~ paste0(round(dat_gene$baseline_medianTPMAbd, 2), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < 1     ~ paste0(round(dat_gene$baseline_medianTPMAbd, 1), " TPM"),
                                                          TRUE ~ paste0(round(dat_gene$baseline_medianTPMAbd, 0), " TPM"))))
})
dat_plot_ageInt <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(gene         = factor(gene, levels = rev(unique(gene))),
         intervention = factor(intervention, levels = c("Baseline median TPM abd.", "Fresh intervention\n(age low)", "Fresh intervention\n(age high)",
                                                        "Pasteurized intervention\n(age low)", "Pasteurized intervention\n(age high)")),
         pvalue_signif = factor(pvalue_signif, levels = c("not significant",
                                                          "significant pos. effect (fresh intervention)",
                                                          "significant neg. effect (fresh intervention)",
                                                          "significant pos. effect (pasteurized intervention)",
                                                          "significant neg. effect (pasteurized intervention)",
                                                          "")))

# plot
dat_plot_ageInt %>% 
  ggplot(aes(x = intervention, y = gene)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("q-value", values = c("not significant" = "gray95",
                                          "significant pos. effect (fresh intervention)" = "#7cbde9",
                                          "significant neg. effect (fresh intervention)" = "#1F78B4",
                                          "significant pos. effect (pasteurized intervention)" = "#ffb366",
                                          "significant neg. effect (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white") +
  facet_grid(rows = vars(SCFA), scales = "free", space = "free") +
  ggtitle("Intervention effects on SCFA genes") +
  theme(axis.title = element_blank(),
        panel.grid = element_blank(),
        strip.background = element_rect(fill = "gray97", color = "gray97"))

Code
# ggsave("functional_geneScreening_ageInteractions_qvalues.png", width = 12, height = 2)
Code
# prepare the base dataset for the plot
dat_signif <- dat_SCFAresults %>% 
  filter((freshInt_ageLow_pvalue < 0.05) | (freshInt_ageHigh_pvalue < 0.05) |
           (pastInt_ageLow_pvalue < 0.05) | (pastInt_ageHigh_pvalue < 0.05))
dat_plot <- data.frame(SCFA          = rep(dat_signif$SCFA,  times = 4),
                       gene          = rep(dat_signif$gene,  times = 4),
                       baseline_medianTPMAbd = rep(dat_signif$baseline_medianTPMAbd, times = 4),
                       model         = rep(dat_signif$model, times = 4),
                       intervention  = rep(c("Fresh intervention\n(age low)", "Fresh intervention\n(age high)",
                                             "Pasteurized intervention\n(age low)", "Pasteurized intervention\n(age high)"), each = nrow(dat_signif)),
                       effect        = c(dat_signif$freshInt_ageLow_effect, dat_signif$freshInt_ageHigh_effect,
                                         dat_signif$pastInt_ageLow_effect,  dat_signif$pastInt_ageHigh_effect),
                       pvalue        = c(dat_signif$freshInt_ageLow_pvalue, dat_signif$freshInt_ageHigh_pvalue,
                                         dat_signif$pastInt_ageLow_pvalue,  dat_signif$pastInt_ageHigh_pvalue),
                       pastEffect_forSorting = rep(dat_signif$pastInt_ageLow_effect, times = 4)) %>% 
  arrange(SCFA, desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((pvalue < 0.05) & grepl("Fresh", intervention) ~ "significant (fresh intervention)",
                                   pvalue < 0.05 ~ "significant (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         effect_label  = case_when((model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 1)        ~ paste0("+", round(effect, 0), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0)        ~ "+ <0.0001 TPM",
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -1)       ~ paste0(round(effect, 0), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.1)     ~ paste0(round(effect, 1), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.01)    ~ paste0(round(effect, 2), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.001)   ~ paste0(round(effect, 3), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.0001)  ~ paste0(round(effect, 4), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect < 0)                   ~ "- >-0.0001 TPM",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect >= 0)                                ~ "+ <0.01%",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$gene)), function(gen) {
  dat_gene <- dat_plot %>% filter(gene == gen)
  dat_gene %>% 
    dplyr::bind_rows(data.frame(SCFA          = dat_gene$SCFA[1],
                                gene          = gen,
                                intervention  = "Baseline median TPM abd.",
                                pvalue_signif = "",
                                effect_label  = case_when(dat_gene$baseline_medianTPMAbd < .0001 ~ "<0.0001 TPM",
                                                          dat_gene$baseline_medianTPMAbd < .001  ~ paste0(round(dat_gene$baseline_medianTPMAbd, 4), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < .01   ~ paste0(round(dat_gene$baseline_medianTPMAbd, 3), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < .1    ~ paste0(round(dat_gene$baseline_medianTPMAbd, 2), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < 1     ~ paste0(round(dat_gene$baseline_medianTPMAbd, 1), " TPM"),
                                                          TRUE ~ paste0(round(dat_gene$baseline_medianTPMAbd, 0), " TPM"))))
})
dat_plot <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(gene         = factor(gene, levels = rev(unique(gene))),
         intervention = gsub(intervention, pattern = "age low",  replacement = "age 21-49"),
         intervention = gsub(intervention, pattern = "age high", replacement = "age 50-69"),
         intervention = factor(intervention, levels = c("Baseline median TPM abd.",
                                                        "Fresh intervention\n(age 21-49)",
                                                        "Fresh intervention\n(age 50-69)",
                                                        "Pasteurized intervention\n(age 21-49)",
                                                        "Pasteurized intervention\n(age 50-69)")),
         SCFA         = case_when(SCFA == "Acetyl-CoA" ~ "ACoA",
                                  SCFA == "succinate"  ~ "Succ.",
                                  TRUE                 ~ SCFA))

# plot
dat_plot %>% 
  ggplot(aes(x = intervention, y = gene)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("uncorrected p-value", values = c("not significant" = "gray95",
                                                      "significant (fresh intervention)" = "#1F78B4",
                                                      "significant (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white") +
  facet_grid(rows = vars(SCFA), scales = "free", space = "free") +
  ggtitle("Intervention effects on SCFA genes") +
  theme(axis.title = element_blank(),
        panel.grid = element_blank(),
        strip.background = element_rect(fill = "gray97", color = "gray97"))

Code
# ggsave("FigureS17_geneScreening_ageStrat_pvalues.png", width = 12, height = 9)

Sex

Figures for publication
Code
# prepare the base dataset for the plot
dat_signif <- dat_SCFAresults %>% 
  filter((freshInt_sexM_pvalue_correct < 0.1) | (freshInt_sexW_pvalue_correct < 0.1) |
           (pastInt_sexM_pvalue_correct < 0.1) | (pastInt_sexW_pvalue_correct < 0.1))
dat_plot <- data.frame(SCFA          = rep(dat_signif$SCFA,  times = 2),
                       gene          = rep(dat_signif$gene,  times = 2),
                       baseline_medianTPMAbd = rep(dat_signif$baseline_medianTPMAbd, times = 4),
                       model         = rep(dat_signif$model, times = 4),
                       intervention  = rep(c("Fresh intervention\n(gender male)", "Fresh intervention\n(gender female)",
                                             "Pasteurized intervention\n(gender male)", "Pasteurized intervention\n(gender female)"), each = nrow(dat_signif)),
                       effect        = c(dat_signif$freshInt_sexM_effect, dat_signif$freshInt_sexW_effect,
                                         dat_signif$pastInt_sexM_effect,  dat_signif$pastInt_sexW_effect),
                       qvalue        = c(dat_signif$freshInt_sexM_pvalue_correct, dat_signif$freshInt_sexW_pvalue_correct,
                                         dat_signif$pastInt_sexM_pvalue_correct,  dat_signif$pastInt_sexW_pvalue_correct),
                       pastEffect_forSorting = rep(dat_signif$pastInt_sexM_effect, times = 4)) %>% 
  arrange(SCFA, desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((qvalue < 0.1) & grepl("Fresh", intervention) & (effect > 0) ~ "significant pos. effect (fresh intervention)",
                                   (qvalue < 0.1) & grepl("Fresh", intervention) & (effect < 0) ~ "significant neg. effect (fresh intervention)",
                                   (qvalue < 0.1) & (effect > 0) ~ "significant pos. effect (pasteurized intervention)",
                                   (qvalue < 0.1) & (effect < 0) ~ "significant neg. effect (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         effect_label  = case_when((model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 1)        ~ paste0("+", round(effect, 0), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0)        ~ "+ <0.0001 TPM",
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -1)       ~ paste0(round(effect, 0), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.1)     ~ paste0(round(effect, 1), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.01)    ~ paste0(round(effect, 2), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.001)   ~ paste0(round(effect, 3), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.0001)  ~ paste0(round(effect, 4), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect < 0)                   ~ "- >-0.0001 TPM",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect >= 0)                                ~ "+ <0.01%",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$gene)), function(gen) {
  dat_gene <- dat_plot %>% filter(gene == gen)
  dat_gene %>% 
    dplyr::bind_rows(data.frame(SCFA          = dat_gene$SCFA[1],
                                gene          = gen,
                                intervention  = "Baseline median TPM abd.",
                                pvalue_signif = "",
                                effect_label  = case_when(dat_gene$baseline_medianTPMAbd < .0001 ~ "<0.0001 TPM",
                                                          dat_gene$baseline_medianTPMAbd < .001  ~ paste0(round(dat_gene$baseline_medianTPMAbd, 4), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < .01   ~ paste0(round(dat_gene$baseline_medianTPMAbd, 3), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < .1    ~ paste0(round(dat_gene$baseline_medianTPMAbd, 2), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < 1     ~ paste0(round(dat_gene$baseline_medianTPMAbd, 1), " TPM"),
                                                          TRUE ~ paste0(round(dat_gene$baseline_medianTPMAbd, 0), " TPM"))))
})
dat_plot_sexInt <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(gene         = factor(gene, levels = rev(unique(gene))),
         intervention = factor(intervention, levels = c("Baseline median TPM abd.", "Fresh intervention\n(gender male)", "Fresh intervention\n(gender female)",
                                                        "Pasteurized intervention\n(gender male)", "Pasteurized intervention\n(gender female)")),
         pvalue_signif = factor(pvalue_signif, levels = c("not significant",
                                                          "significant pos. effect (fresh intervention)",
                                                          "significant neg. effect (fresh intervention)",
                                                          "significant pos. effect (pasteurized intervention)",
                                                          "significant neg. effect (pasteurized intervention)",
                                                          "")))

# plot
dat_plot_sexInt %>% 
  ggplot(aes(x = intervention, y = gene)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("q-value", values = c("not significant" = "gray95",
                                          "significant pos. effect (fresh intervention)" = "#7cbde9",
                                          "significant neg. effect (fresh intervention)" = "#1F78B4",
                                          "significant pos. effect (pasteurized intervention)" = "#ffb366",
                                          "significant neg. effect (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white") +
  facet_grid(rows = vars(SCFA), scales = "free", space = "free") +
  ggtitle("Intervention effects on SCFA genes") +
  theme(axis.title = element_blank(),
        panel.grid = element_blank(),
        strip.background = element_rect(fill = "gray97", color = "gray97"))

Code
# ggsave("functional_geneScreening_sexInteractions_qvalues.png", width = 12, height = 1.5)
Code
# prepare the base dataset for the plot
dat_signif <- dat_SCFAresults %>% 
  filter((freshInt_sexM_pvalue < 0.05) | (freshInt_sexW_pvalue < 0.05) |
           (pastInt_sexM_pvalue < 0.05) | (pastInt_sexW_pvalue < 0.05))
dat_plot <- data.frame(SCFA          = rep(dat_signif$SCFA,  times = 2),
                       gene          = rep(dat_signif$gene,  times = 2),
                       baseline_medianTPMAbd = rep(dat_signif$baseline_medianTPMAbd, times = 4),
                       model         = rep(dat_signif$model, times = 4),
                       intervention  = rep(c("Fresh intervention\n(gender male)", "Fresh intervention\n(gender female)",
                                             "Pasteurized intervention\n(gender male)", "Pasteurized intervention\n(gender female)"), each = nrow(dat_signif)),
                       effect        = c(dat_signif$freshInt_sexM_effect, dat_signif$freshInt_sexW_effect,
                                         dat_signif$pastInt_sexM_effect,  dat_signif$pastInt_sexW_effect),
                       pvalue        = c(dat_signif$freshInt_sexM_pvalue, dat_signif$freshInt_sexW_pvalue,
                                         dat_signif$pastInt_sexM_pvalue,  dat_signif$pastInt_sexW_pvalue),
                       pastEffect_forSorting = rep(dat_signif$pastInt_sexM_effect, times = 4)) %>% 
  arrange(SCFA, desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((pvalue < 0.05) & grepl("Fresh", intervention) ~ "significant (fresh intervention)",
                                   pvalue < 0.05 ~ "significant (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         effect_label  = case_when((model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 1)        ~ paste0("+", round(effect, 0), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0)        ~ "+ <0.0001 TPM",
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -1)       ~ paste0(round(effect, 0), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.1)     ~ paste0(round(effect, 1), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.01)    ~ paste0(round(effect, 2), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.001)   ~ paste0(round(effect, 3), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.0001)  ~ paste0(round(effect, 4), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect < 0)                   ~ "- >-0.0001 TPM",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect >= 0)                                ~ "+ <0.01%",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$gene)), function(gen) {
  dat_gene <- dat_plot %>% filter(gene == gen)
  dat_gene %>% 
    dplyr::bind_rows(data.frame(SCFA          = dat_gene$SCFA[1],
                                gene          = gen,
                                intervention  = "Baseline median TPM abd.",
                                pvalue_signif = "",
                                effect_label  = case_when(dat_gene$baseline_medianTPMAbd < .0001 ~ "<0.0001 TPM",
                                                          dat_gene$baseline_medianTPMAbd < .001  ~ paste0(round(dat_gene$baseline_medianTPMAbd, 4), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < .01   ~ paste0(round(dat_gene$baseline_medianTPMAbd, 3), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < .1    ~ paste0(round(dat_gene$baseline_medianTPMAbd, 2), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < 1     ~ paste0(round(dat_gene$baseline_medianTPMAbd, 1), " TPM"),
                                                          TRUE ~ paste0(round(dat_gene$baseline_medianTPMAbd, 0), " TPM"))))
})
dat_plot <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(gene         = factor(gene, levels = rev(unique(gene))),
         intervention = gsub(intervention, pattern = "gender ", replacement = ""),
         intervention = factor(intervention, levels = c("Baseline median TPM abd.",
                                                        "Fresh intervention\n(male)",
                                                        "Fresh intervention\n(female)",
                                                        "Pasteurized intervention\n(male)",
                                                        "Pasteurized intervention\n(female)")))

# plot
dat_plot %>% 
  ggplot(aes(x = intervention, y = gene)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("uncorrected p-value", values = c("not significant" = "gray95",
                                                      "significant (fresh intervention)" = "#1F78B4",
                                                      "significant (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white") +
  facet_grid(rows = vars(SCFA), scales = "free", space = "free") +
  ggtitle("Intervention effects on SCFA genes") +
  theme(axis.title = element_blank(),
        panel.grid = element_blank(),
        strip.background = element_rect(fill = "gray97", color = "gray97"))

Code
# ggsave("FigureS18_geneScreening_sexStrat_pvalues.png", width = 12, height = 9)

BMI

Figures for publication
Code
# prepare the base dataset for the plot
dat_signif <- dat_SCFAresults %>% 
  filter((freshInt_bmiLow_pvalue_correct < 0.1) | (freshInt_bmiHigh_pvalue_correct < 0.1) |
           (pastInt_bmiLow_pvalue_correct < 0.1) | (pastInt_bmiHigh_pvalue_correct < 0.1))
dat_plot <- data.frame(SCFA          = rep(dat_signif$SCFA,  times = 2),
                       gene          = rep(dat_signif$gene,  times = 2),
                       baseline_medianTPMAbd = rep(dat_signif$baseline_medianTPMAbd, times = 4),
                       model         = rep(dat_signif$model, times = 4),
                       intervention  = rep(c("Fresh intervention\n(BMI low)", "Fresh intervention\n(BMI high)",
                                             "Pasteurized intervention\n(BMI low)", "Pasteurized intervention\n(BMI high)"), each = nrow(dat_signif)),
                       effect        = c(dat_signif$freshInt_bmiLow_effect, dat_signif$freshInt_bmiHigh_effect,
                                         dat_signif$pastInt_bmiLow_effect,  dat_signif$pastInt_bmiHigh_effect),
                       qvalue        = c(dat_signif$freshInt_bmiLow_pvalue_correct, dat_signif$freshInt_bmiHigh_pvalue_correct,
                                         dat_signif$pastInt_bmiLow_pvalue_correct,  dat_signif$pastInt_bmiHigh_pvalue_correct),
                       pastEffect_forSorting = rep(dat_signif$pastInt_bmiLow_effect, times = 4)) %>% 
  arrange(SCFA, desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((qvalue < 0.1) & grepl("Fresh", intervention) & (effect > 0) ~ "significant pos. effect (fresh intervention)",
                                   (qvalue < 0.1) & grepl("Fresh", intervention) & (effect < 0) ~ "significant neg. effect (fresh intervention)",
                                   (qvalue < 0.1) & (effect > 0) ~ "significant pos. effect (pasteurized intervention)",
                                   (qvalue < 0.1) & (effect < 0) ~ "significant neg. effect (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         effect_label  = case_when((model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 1)        ~ paste0("+", round(effect, 0), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0)        ~ "+ <0.0001 TPM",
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -1)       ~ paste0(round(effect, 0), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.1)     ~ paste0(round(effect, 1), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.01)    ~ paste0(round(effect, 2), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.001)   ~ paste0(round(effect, 3), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.0001)  ~ paste0(round(effect, 4), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect < 0)                   ~ "- >-0.0001 TPM",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect >= 0)                                ~ "+ <0.01%",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$gene)), function(gen) {
  dat_gene <- dat_plot %>% filter(gene == gen)
  dat_gene %>% 
    dplyr::bind_rows(data.frame(SCFA          = dat_gene$SCFA[1],
                                gene          = gen,
                                intervention  = "Baseline median TPM abd.",
                                pvalue_signif = "",
                                effect_label  = case_when(dat_gene$baseline_medianTPMAbd < .0001 ~ "<0.0001 TPM",
                                                          dat_gene$baseline_medianTPMAbd < .001  ~ paste0(round(dat_gene$baseline_medianTPMAbd, 4), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < .01   ~ paste0(round(dat_gene$baseline_medianTPMAbd, 3), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < .1    ~ paste0(round(dat_gene$baseline_medianTPMAbd, 2), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < 1     ~ paste0(round(dat_gene$baseline_medianTPMAbd, 1), " TPM"),
                                                          TRUE ~ paste0(round(dat_gene$baseline_medianTPMAbd, 0), " TPM"))))
})
dat_plot_bmiInt <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(gene         = factor(gene, levels = rev(unique(gene))),
         intervention = factor(intervention, levels = c("Baseline median TPM abd.", "Fresh intervention\n(BMI low)", "Fresh intervention\n(BMI high)",
                                                        "Pasteurized intervention\n(BMI low)", "Pasteurized intervention\n(BMI high)")),
         pvalue_signif = factor(pvalue_signif, levels = c("not significant",
                                                          "significant pos. effect (fresh intervention)",
                                                          "significant neg. effect (fresh intervention)",
                                                          "significant pos. effect (pasteurized intervention)",
                                                          "significant neg. effect (pasteurized intervention)",
                                                          "")))

# plot
dat_plot_bmiInt %>% 
  ggplot(aes(x = intervention, y = gene)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("q-value", values = c("not significant" = "gray95",
                                          "significant pos. effect (fresh intervention)" = "#7cbde9",
                                          "significant neg. effect (fresh intervention)" = "#1F78B4",
                                          "significant pos. effect (pasteurized intervention)" = "#ffb366",
                                          "significant neg. effect (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white") +
  facet_grid(rows = vars(SCFA), scales = "free", space = "free") +
  ggtitle("Intervention effects on SCFA genes") +
  theme(axis.title = element_blank(),
        panel.grid = element_blank(),
        strip.background = element_rect(fill = "gray97", color = "gray97"))

Code
# ggsave("functional_geneScreening_bmiInteractions_qvalues.png", width = 12, height = 1.5)
Code
# prepare the base dataset for the plot
dat_signif <- dat_SCFAresults %>% 
  filter((freshInt_bmiLow_pvalue < 0.05) | (freshInt_bmiHigh_pvalue < 0.05) |
           (pastInt_bmiLow_pvalue < 0.05) | (pastInt_bmiHigh_pvalue < 0.05))
dat_plot <- data.frame(SCFA          = rep(dat_signif$SCFA,  times = 2),
                       gene          = rep(dat_signif$gene,  times = 2),
                       baseline_medianTPMAbd = rep(dat_signif$baseline_medianTPMAbd, times = 4),
                       model         = rep(dat_signif$model, times = 4),
                       intervention  = rep(c("Fresh intervention\n(BMI low)", "Fresh intervention\n(BMI high)",
                                             "Pasteurized intervention\n(BMI low)", "Pasteurized intervention\n(BMI high)"), each = nrow(dat_signif)),
                       effect        = c(dat_signif$freshInt_bmiLow_effect, dat_signif$freshInt_bmiHigh_effect,
                                         dat_signif$pastInt_bmiLow_effect,  dat_signif$pastInt_bmiHigh_effect),
                       pvalue        = c(dat_signif$freshInt_bmiLow_pvalue, dat_signif$freshInt_bmiHigh_pvalue,
                                         dat_signif$pastInt_bmiLow_pvalue,  dat_signif$pastInt_bmiHigh_pvalue),
                       pastEffect_forSorting = rep(dat_signif$pastInt_bmiLow_effect, times = 4)) %>% 
  arrange(SCFA, desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((pvalue < 0.05) & grepl("Fresh", intervention) ~ "significant (fresh intervention)",
                                   pvalue < 0.05 ~ "significant (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         effect_label  = case_when((model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 1)        ~ paste0("+", round(effect, 0), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0)        ~ "+ <0.0001 TPM",
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -1)       ~ paste0(round(effect, 0), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.1)     ~ paste0(round(effect, 1), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.01)    ~ paste0(round(effect, 2), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.001)   ~ paste0(round(effect, 3), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.0001)  ~ paste0(round(effect, 4), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect < 0)                   ~ "- >-0.0001 TPM",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect >= 0)                                ~ "+ <0.01%",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$gene)), function(gen) {
  dat_gene <- dat_plot %>% filter(gene == gen)
  dat_gene %>% 
    dplyr::bind_rows(data.frame(SCFA          = dat_gene$SCFA[1],
                                gene          = gen,
                                intervention  = "Baseline median TPM abd.",
                                pvalue_signif = "",
                                effect_label  = case_when(dat_gene$baseline_medianTPMAbd < .0001 ~ "<0.0001 TPM",
                                                          dat_gene$baseline_medianTPMAbd < .001  ~ paste0(round(dat_gene$baseline_medianTPMAbd, 4), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < .01   ~ paste0(round(dat_gene$baseline_medianTPMAbd, 3), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < .1    ~ paste0(round(dat_gene$baseline_medianTPMAbd, 2), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < 1     ~ paste0(round(dat_gene$baseline_medianTPMAbd, 1), " TPM"),
                                                          TRUE ~ paste0(round(dat_gene$baseline_medianTPMAbd, 0), " TPM"))))
})
dat_plot <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(gene         = factor(gene, levels = rev(unique(gene))),
         intervention = gsub(intervention, pattern = "low",  replacement = "[18, 25)"),
         intervention = gsub(intervention, pattern = "high", replacement = "[25, 31]"),
         intervention = factor(intervention, levels = c("Baseline median TPM abd.",
                                                        "Fresh intervention\n(BMI [18, 25))",
                                                        "Fresh intervention\n(BMI [25, 31])",
                                                        "Pasteurized intervention\n(BMI [18, 25))",
                                                        "Pasteurized intervention\n(BMI [25, 31])")))

# plot
dat_plot %>% 
  ggplot(aes(x = intervention, y = gene)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("uncorrected p-value", values = c("not significant" = "gray95",
                                                      "significant (fresh intervention)" = "#1F78B4",
                                                      "significant (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white") +
  facet_grid(rows = vars(SCFA), scales = "free", space = "free") +
  ggtitle("Intervention effects on SCFA genes") +
  theme(axis.title = element_blank(),
        panel.grid = element_blank(),
        strip.background = element_rect(fill = "gray97", color = "gray97"))

Code
# ggsave("FigureS19_geneScreening_bmiStrat_pvalues.png", width = 12, height = 9)

Joint heatmap of all interaction effects

… of uncorrected p-values

Create the age interaction data

… same as for the above plot, but also including species that showed significances in sex or BMI interactions

Code
# prepare the base dataset for the plot
dat_signif <- dat_SCFAresults %>% 
  filter((freshInt_ageLow_pvalue < 0.05) | (freshInt_ageHigh_pvalue < 0.05) |
           (pastInt_ageLow_pvalue < 0.05) | (pastInt_ageHigh_pvalue < 0.05) |
           (freshInt_sexM_pvalue < 0.05) | (freshInt_sexW_pvalue < 0.05) |
           (pastInt_sexM_pvalue < 0.05) | (pastInt_sexW_pvalue < 0.05) |
           (freshInt_bmiLow_pvalue < 0.05) | (freshInt_bmiHigh_pvalue < 0.05) |
           (pastInt_bmiLow_pvalue < 0.05) | (pastInt_bmiHigh_pvalue < 0.05))
dat_plot <- data.frame(gene          = rep(dat_signif$gene,  times = 4),
                       SCFA          = rep(dat_signif$SCFA,  times = 4),
                       baseline_medianTPMAbd = rep(dat_signif$baseline_medianTPMAbd, times = 4),
                       model         = rep(dat_signif$model, times = 4),
                       intervention  = rep(c("Fresh intervention\n(age low)", "Fresh intervention\n(age high)",
                                             "Pasteurized intervention\n(age low)", "Pasteurized intervention\n(age high)"), each = nrow(dat_signif)),
                       effect        = c(dat_signif$freshInt_ageLow_effect, dat_signif$freshInt_ageHigh_effect,
                                         dat_signif$pastInt_ageLow_effect,  dat_signif$pastInt_ageHigh_effect),
                       pvalue        = c(dat_signif$freshInt_ageLow_pvalue, dat_signif$freshInt_ageHigh_pvalue,
                                         dat_signif$pastInt_ageLow_pvalue,  dat_signif$pastInt_ageHigh_pvalue),
                       pastEffect_forSorting = rep(dat_signif$pastInt_ageLow_effect, times = 4)) %>% 
  arrange(desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((pvalue < 0.05) & grepl("Fresh", intervention) & (effect > 0) ~ "significant pos. effect (fresh intervention)",
                                   (pvalue < 0.05) & grepl("Fresh", intervention) & (effect < 0) ~ "significant neg. effect (fresh intervention)",
                                   (pvalue < 0.05) & (effect > 0) ~ "significant pos. effect (pasteurized intervention)",
                                   (pvalue < 0.05) & (effect < 0) ~ "significant neg. effect (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         effect_label  = case_when((model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 1)        ~ paste0("+", round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0)        ~ "+ <0.0001 PP",
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -1)       ~ paste0(round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.1)     ~ paste0(round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.01)    ~ paste0(round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.001)   ~ paste0(round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.0001)  ~ paste0(round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect < 0)                   ~ "- >-0.0001 PP",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect >= 0)                                ~ "+ <0.01%",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$gene)), function(gen) {
  dat_gene <- dat_plot %>% filter(gene == gen)
  dat_gene %>% 
    dplyr::bind_rows(data.frame(gene          = gen,
                                intervention  = "Baseline median TPM abd.",
                                SCFA          = dat_gene$SCFA[1],
                                pvalue_signif = "",
                                effect_label  = case_when(dat_gene$baseline_medianTPMAbd < .0001 ~ "<0.0001 TPM",
                                                          dat_gene$baseline_medianTPMAbd < .001  ~ paste0(round(dat_gene$baseline_medianTPMAbd, 4), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < .01   ~ paste0(round(dat_gene$baseline_medianTPMAbd, 3), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < .1    ~ paste0(round(dat_gene$baseline_medianTPMAbd, 2), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < 1     ~ paste0(round(dat_gene$baseline_medianTPMAbd, 1), " TPM"),
                                                          TRUE ~ paste0(round(dat_gene$baseline_medianTPMAbd, 0), " TPM"))))
})
dat_plot_ageIntp <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(gene         = factor(gene, levels = rev(unique(gene))),
         intervention = factor(intervention, levels = c("Baseline median TPM abd.", "Fresh intervention\n(age low)", "Fresh intervention\n(age high)",
                                                        "Pasteurized intervention\n(age low)", "Pasteurized intervention\n(age high)")),
         text_highlight = case_when(grepl("Baseline", intervention) & (effect_label == "<0.0001%") ~ "no",
                                    pvalue_signif == "not significant" ~ "no",
                                    TRUE ~ "yes"))
create the sex interaction data

… same as for the above plot, but also including species that showed significances in age or BMI interactions

Code
# prepare the base dataset for the plot
dat_signif <- dat_SCFAresults %>% 
  filter((freshInt_ageLow_pvalue < 0.05) | (freshInt_ageHigh_pvalue < 0.05) |
           (pastInt_ageLow_pvalue < 0.05) | (pastInt_ageHigh_pvalue < 0.05) |
           (freshInt_sexM_pvalue < 0.05) | (freshInt_sexW_pvalue < 0.05) |
           (pastInt_sexM_pvalue < 0.05) | (pastInt_sexW_pvalue < 0.05) |
           (freshInt_bmiLow_pvalue < 0.05) | (freshInt_bmiHigh_pvalue < 0.05) |
           (pastInt_bmiLow_pvalue < 0.05) | (pastInt_bmiHigh_pvalue < 0.05))
dat_plot <- data.frame(gene          = rep(dat_signif$gene,  times = 4),
                       SCFA          = rep(dat_signif$SCFA,  times = 4),
                       baseline_medianTPMAbd = rep(dat_signif$baseline_medianTPMAbd, times = 4),
                       model         = rep(dat_signif$model, times = 4),
                       intervention  = rep(c("Fresh intervention\n(gender male)", "Fresh intervention\n(gender female)",
                                             "Pasteurized intervention\n(gender male)", "Pasteurized intervention\n(gender female)"), each = nrow(dat_signif)),
                       effect        = c(dat_signif$freshInt_sexM_effect, dat_signif$freshInt_sexW_effect,
                                         dat_signif$pastInt_sexM_effect,  dat_signif$pastInt_sexW_effect),
                       pvalue        = c(dat_signif$freshInt_sexM_pvalue, dat_signif$freshInt_sexW_pvalue,
                                         dat_signif$pastInt_sexM_pvalue,  dat_signif$pastInt_sexW_pvalue),
                       pastEffect_forSorting = rep(dat_signif$pastInt_sexM_effect, times = 4)) %>% 
  arrange(desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((pvalue < 0.05) & grepl("Fresh", intervention) & (effect > 0) ~ "significant pos. effect (fresh intervention)",
                                   (pvalue < 0.05) & grepl("Fresh", intervention) & (effect < 0) ~ "significant neg. effect (fresh intervention)",
                                   (pvalue < 0.05) & (effect > 0) ~ "significant pos. effect (pasteurized intervention)",
                                   (pvalue < 0.05) & (effect < 0) ~ "significant neg. effect (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         effect_label  = case_when((model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 1)        ~ paste0("+", round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0)        ~ "+ <0.0001 PP",
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -1)       ~ paste0(round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.1)     ~ paste0(round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.01)    ~ paste0(round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.001)   ~ paste0(round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.0001)  ~ paste0(round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect < 0)                   ~ "- >-0.0001 PP",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect >= 0)                                ~ "+ <0.01%",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$gene)), function(gen) {
  dat_gene <- dat_plot %>% filter(gene == gen)
  dat_gene %>% 
    dplyr::bind_rows(data.frame(gene          = gen,
                                intervention  = "Baseline median TPM abd.",
                                SCFA          = dat_gene$SCFA[1],
                                pvalue_signif = "",
                                effect_label  = case_when(dat_gene$baseline_medianTPMAbd < .0001 ~ "<0.0001 TPM",
                                                          dat_gene$baseline_medianTPMAbd < .001  ~ paste0(round(dat_gene$baseline_medianTPMAbd, 4), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < .01   ~ paste0(round(dat_gene$baseline_medianTPMAbd, 3), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < .1    ~ paste0(round(dat_gene$baseline_medianTPMAbd, 2), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < 1     ~ paste0(round(dat_gene$baseline_medianTPMAbd, 1), " TPM"),
                                                          TRUE ~ paste0(round(dat_gene$baseline_medianTPMAbd, 0), " TPM"))))
})
dat_plot_sexIntp <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(gene         = factor(gene, levels = rev(unique(gene))),
         intervention = factor(intervention, levels = c("Baseline median TPM abd.", "Fresh intervention\n(gender male)", "Fresh intervention\n(gender female)",
                                                        "Pasteurized intervention\n(gender male)", "Pasteurized intervention\n(gender female)")),
         text_highlight = case_when(grepl("Baseline", intervention) & (effect_label == "<0.0001%") ~ "no",
                                    pvalue_signif == "not significant" ~ "no",
                                    TRUE ~ "yes"))
create the BMI interaction data

… same as for the above plot, but also including species that showed significances in age or sex interactions

Code
# prepare the base dataset for the plot
dat_signif <- dat_SCFAresults %>% 
  filter((freshInt_ageLow_pvalue < 0.05) | (freshInt_ageHigh_pvalue < 0.05) |
           (pastInt_ageLow_pvalue < 0.05) | (pastInt_ageHigh_pvalue < 0.05) |
           (freshInt_sexM_pvalue < 0.05) | (freshInt_sexW_pvalue < 0.05) |
           (pastInt_sexM_pvalue < 0.05) | (pastInt_sexW_pvalue < 0.05) |
           (freshInt_bmiLow_pvalue < 0.05) | (freshInt_bmiHigh_pvalue < 0.05) |
           (pastInt_bmiLow_pvalue < 0.05) | (pastInt_bmiHigh_pvalue < 0.05))
dat_plot <- data.frame(gene          = rep(dat_signif$gene,  times = 4),
                       SCFA          = rep(dat_signif$SCFA,  times = 4),
                       baseline_medianTPMAbd = rep(dat_signif$baseline_medianTPMAbd, times = 4),
                       model         = rep(dat_signif$model, times = 4),
                       intervention  = rep(c("Fresh intervention\n(BMI low)", "Fresh intervention\n(BMI high)",
                                             "Pasteurized intervention\n(BMI low)", "Pasteurized intervention\n(BMI high)"), each = nrow(dat_signif)),
                       effect        = c(dat_signif$freshInt_bmiLow_effect, dat_signif$freshInt_bmiHigh_effect,
                                         dat_signif$pastInt_bmiLow_effect,  dat_signif$pastInt_bmiHigh_effect),
                       pvalue        = c(dat_signif$freshInt_bmiLow_pvalue, dat_signif$freshInt_bmiHigh_pvalue,
                                         dat_signif$pastInt_bmiLow_pvalue,  dat_signif$pastInt_bmiHigh_pvalue),
                       pastEffect_forSorting = rep(dat_signif$pastInt_bmiLow_effect, times = 4)) %>% 
  arrange(desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((pvalue < 0.05) & grepl("Fresh", intervention) & (effect > 0) ~ "significant pos. effect (fresh intervention)",
                                   (pvalue < 0.05) & grepl("Fresh", intervention) & (effect < 0) ~ "significant neg. effect (fresh intervention)",
                                   (pvalue < 0.05) & (effect > 0) ~ "significant pos. effect (pasteurized intervention)",
                                   (pvalue < 0.05) & (effect < 0) ~ "significant neg. effect (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         effect_label  = case_when((model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 1)        ~ paste0("+", round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0)        ~ "+ <0.0001 PP",
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -1)       ~ paste0(round(effect, 0), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.1)     ~ paste0(round(effect, 1), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.01)    ~ paste0(round(effect, 2), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.001)   ~ paste0(round(effect, 3), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.0001)  ~ paste0(round(effect, 4), " PP"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect < 0)                   ~ "- >-0.0001 PP",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect >= 0)                                ~ "+ <0.01%",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$gene)), function(gen) {
  dat_gene <- dat_plot %>% filter(gene == gen)
  dat_gene %>% 
    dplyr::bind_rows(data.frame(gene          = gen,
                                intervention  = "Baseline median TPM abd.",
                                SCFA          = dat_gene$SCFA[1],
                                pvalue_signif = "",
                                effect_label  = case_when(dat_gene$baseline_medianTPMAbd < .0001 ~ "<0.0001 TPM",
                                                          dat_gene$baseline_medianTPMAbd < .001  ~ paste0(round(dat_gene$baseline_medianTPMAbd, 4), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < .01   ~ paste0(round(dat_gene$baseline_medianTPMAbd, 3), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < .1    ~ paste0(round(dat_gene$baseline_medianTPMAbd, 2), " TPM"),
                                                          dat_gene$baseline_medianTPMAbd < 1     ~ paste0(round(dat_gene$baseline_medianTPMAbd, 1), " TPM"),
                                                          TRUE ~ paste0(round(dat_gene$baseline_medianTPMAbd, 0), " TPM"))))
})
dat_plot_bmiIntp <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(gene         = factor(gene, levels = rev(unique(gene))),
         intervention = factor(intervention, levels = c("Baseline median TPM abd.", "Fresh intervention\n(BMI low)", "Fresh intervention\n(BMI high)",
                                                        "Pasteurized intervention\n(BMI low)", "Pasteurized intervention\n(BMI high)")),
         text_highlight = case_when(grepl("Baseline", intervention) & (effect_label == "<0.0001%") ~ "no",
                                    pvalue_signif == "not significant" ~ "no",
                                    TRUE ~ "yes"))
create the joint plot
Code
# plots
gg_heatmap_ageInt <- dat_plot_ageIntp %>% 
  ggplot(aes(x = intervention, y = gene)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("uncorrected p-value", values = c("not significant" = "gray95",
                                                      "significant pos. effect (fresh intervention)" = "#7cbde9",
                                                      "significant neg. effect (fresh intervention)" = "#1F78B4",
                                                      "significant pos. effect (pasteurized intervention)" = "#ffb366",
                                                      "significant neg. effect (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white") +
  facet_grid(rows = vars(SCFA), scales = "free", space = "free") +
  ggtitle("Intervention effects on species") +
  theme(axis.title = element_blank(),
        panel.grid = element_blank(),
        strip.text = element_blank())
gg_heatmap_sexInt <- dat_plot_sexIntp %>% 
  filter(!grepl("Baseline", intervention)) %>% 
  ggplot(aes(x = intervention, y = gene)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("uncorrected p-value", values = c("not significant" = "gray95",
                                                      "significant pos. effect (fresh intervention)" = "#7cbde9",
                                                      "significant neg. effect (fresh intervention)" = "#1F78B4",
                                                      "significant pos. effect (pasteurized intervention)" = "#ffb366",
                                                      "significant neg. effect (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white") +
  facet_grid(rows = vars(SCFA), scales = "free", space = "free") +
  ggtitle("Intervention effects on species") +
  theme(axis.title  = element_blank(),
        axis.text.y = element_blank(),
        panel.grid  = element_blank(),
        strip.text  = element_blank())
gg_heatmap_bmiInt <- dat_plot_bmiIntp %>% 
  filter(!grepl("Baseline", intervention)) %>% 
  ggplot(aes(x = intervention, y = gene)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("uncorrected p-value", values = c("not significant" = "gray95",
                                                      "significant pos. effect (fresh intervention)" = "#7cbde9",
                                                      "significant neg. effect (fresh intervention)" = "#1F78B4",
                                                      "significant pos. effect (pasteurized intervention)" = "#ffb366",
                                                      "significant neg. effect (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white") +
  ggtitle("Intervention effects on species") +
  facet_grid(rows = vars(SCFA), scales = "free", space = "free") +
  theme(axis.title  = element_blank(),
        axis.text.y = element_blank(),
        panel.grid  = element_blank())

# align the plots
(gg_heatmap_ageInt + gg_heatmap_sexInt + gg_heatmap_bmiInt) +
  patchwork::plot_layout(widths = c(.36, .32, .32), nrow = 1, guides = "collect")

Code
# ggsave("functional_geneScreening_allInteractions_pvalues.png", width = 23, height = 20)

Joint plot of condensed information

Focus on the four biggest phylum groups: Firmicutes, Bacteroidetes, Actinobacteria and Proteobacteria

Code
# data preparation
plot_dat_wide <- dat_SCFAresults %>% 
  group_by(SCFA) %>% 
  dplyr::summarize(n_genes                      = n(),
                   n_freshSignifEffects_overall = sum(freshInt_pvalue         < 0.05),
                   n_freshSignifEffects_ageLow  = sum(freshInt_ageLow_pvalue  < 0.05),
                   n_freshSignifEffects_ageHigh = sum(freshInt_ageHigh_pvalue < 0.05),
                   n_freshSignifEffects_sexM    = sum(freshInt_sexM_pvalue    < 0.05),
                   n_freshSignifEffects_sexW    = sum(freshInt_sexW_pvalue    < 0.05),
                   n_freshSignifEffects_bmiLow  = sum(freshInt_bmiLow_pvalue  < 0.05),
                   n_freshSignifEffects_bmiHigh = sum(freshInt_bmiHigh_pvalue < 0.05),
                   n_pastSignifEffects_overall  = sum(pastInt_pvalue          < 0.05),
                   n_pastSignifEffects_ageLow   = sum(pastInt_ageLow_pvalue   < 0.05),
                   n_pastSignifEffects_ageHigh  = sum(pastInt_ageHigh_pvalue  < 0.05),
                   n_pastSignifEffects_sexM     = sum(pastInt_sexM_pvalue     < 0.05),
                   n_pastSignifEffects_sexW     = sum(pastInt_sexW_pvalue     < 0.05),
                   n_pastSignifEffects_bmiLow   = sum(pastInt_bmiLow_pvalue   < 0.05),
                   n_pastSignifEffects_bmiHigh  = sum(pastInt_bmiHigh_pvalue  < 0.05))


# reshape dataset to long format
plot_dat_long <- data.frame(SCFA                 = rep(plot_dat_wide$SCFA, times = sum(grepl("freshSignifEffects", colnames(plot_dat_wide)))),
                            n_genes              = rep(plot_dat_wide$n_genes, times = sum(grepl("freshSignifEffects", colnames(plot_dat_wide)))),
                            effect_group         = rep(c("overall", "ageInt", "ageInt", "sexInt", "sexInt", "bmiInt", "bmiInt"),
                                                       each = nrow(plot_dat_wide)),
                            effect_type          = rep(c("overall", "ageLow", "ageHigh", "sexM", "sexW", "bmiLow", "bmiHigh"),
                                                       each = nrow(plot_dat_wide)),
                            n_freshSignifEffects = c(plot_dat_wide$n_freshSignifEffects_overall,
                                                     plot_dat_wide$n_freshSignifEffects_ageLow,
                                                     plot_dat_wide$n_freshSignifEffects_ageHigh,
                                                     plot_dat_wide$n_freshSignifEffects_sexM,
                                                     plot_dat_wide$n_freshSignifEffects_sexW,
                                                     plot_dat_wide$n_freshSignifEffects_bmiLow,
                                                     plot_dat_wide$n_freshSignifEffects_bmiHigh),
                            n_pastSignifEffects  = c(plot_dat_wide$n_pastSignifEffects_overall,
                                                     plot_dat_wide$n_pastSignifEffects_ageLow,
                                                     plot_dat_wide$n_pastSignifEffects_ageHigh,
                                                     plot_dat_wide$n_pastSignifEffects_sexM,
                                                     plot_dat_wide$n_pastSignifEffects_sexW,
                                                     plot_dat_wide$n_pastSignifEffects_bmiLow,
                                                     plot_dat_wide$n_pastSignifEffects_bmiHigh)) %>% 
  mutate(SCFA_label   = paste0(SCFA, "\n", n_genes, " genes"),
         effect_group = factor(effect_group, levels = c("overall", "ageInt", "sexInt", "bmiInt")),
         effect_type  = factor(effect_type, levels = rev(c("overall", "ageLow", "ageHigh", "sexM", "sexW", "bmiLow", "bmiHigh"))))

# make the dataset even longer
plot_dat_longer <- data.frame(SCFA         = rep(plot_dat_long$SCFA,         times = 2),
                              n_genes      = rep(plot_dat_long$n_genes,      times = 2),
                              SCFA_label   = rep(plot_dat_long$SCFA_label,   times = 2),
                              effect_group = rep(plot_dat_long$effect_group, times = 2),
                              effect_type  = rep(plot_dat_long$effect_type,  times = 2),
                              intervention = rep(c("Fresh", "Past."), each = nrow(plot_dat_long)),
                              parameter    = "n_signifEffects",
                              value        = c(plot_dat_long$n_freshSignifEffects,
                                               plot_dat_long$n_pastSignifEffects)) %>% 
  group_by(SCFA, parameter, effect_group, intervention) %>% 
  mutate(alpha_value = case_when(effect_group == "overall"  ~ 0.8,
                                 value == min(value)        ~ 0,
                                 TRUE                       ~ 1)) %>% 
  ungroup() %>% 
  mutate(fill_tiles = case_when(effect_type == "overall" ~ "no",
                                TRUE ~ SCFA),
         alpha_tiles = case_when(effect_type %in% c("ageLow", "sexM", "bmiLow") ~ 0.05,
                                 TRUE ~ 0.15))
plot_dat_longer <- plot_dat_longer %>% mutate(SCFA_label = factor(SCFA_label, levels = plot_dat_longer %>% arrange(desc(n_genes)) %>% 
                                                                    pull(SCFA_label) %>% unique()))


# create a color vector for color coding the interaction terms
col_vector <- RColorBrewer::brewer.pal(6, name = "Dark2")

# plot
ggplot(plot_dat_longer, aes(x = intervention, y = effect_type)) +
  geom_tile(aes(fill = fill_tiles, alpha = alpha_tiles)) +
  geom_text(aes(label = value, alpha = alpha_value)) +
  facet_grid(effect_group ~ SCFA_label, scales = "free") +
  scale_fill_manual(values = c("no"             = "white",
                               "Pyruvate"       = col_vector[1],
                               "Acetyl-CoA"     = col_vector[2],
                               "Acetate"        = col_vector[3],
                               "succinate"      = col_vector[4],
                               "Butyryl-CoA"    = col_vector[5],
                               "propionate"     = col_vector[6])) +
  theme(axis.title      = element_blank(),
        panel.grid      = element_blank(),
        legend.position = "none",
        strip.background.x = element_rect(fill = "gray97", color = "gray97"),
        strip.text.y    = element_blank())

Code
# ggsave("functional_geneScreening_allInteractions_table.png", width = 7, height = 3.5)

Main joint SCFA gene figure for the publication

Code
# 1) plot of overall effects
gg_overall <- dat_plot_overall %>% 
  mutate(pvalue_signif = gsub(pvalue_signif, pattern = "significant ",  replacement = ""),
         pvalue_signif = gsub(pvalue_signif, pattern = " intervention", replacement = ""),
         pvalue_signif = gsub(pvalue_signif, pattern = "pasteurized",   replacement = "past."),
         pvalue_signif = factor(pvalue_signif, levels = c("not significant", "pos. effect (fresh)",
                                                          "neg. effect (fresh)", "pos. effect (past.)",
                                                          "neg. effect (past.)", ""))) %>% 
  ggplot(aes(x = intervention, y = gene)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label, size = text_size)) +
  scale_fill_manual("q-value", values = c("not significant" = "gray95",
                                          "pos. effect (fresh)" = "#7cbde9",
                                          "neg. effect (fresh)" = "#1F78B4",
                                          "pos. effect (past.)" = "#ffb366",
                                          "neg. effect (past.)" = "#FF7F00"),
                    na.value = "white") +
  scale_size(range = c(3, 4), guide = "none") +
  labs(tag = "(A)") +
  theme_minimal(base_size = 14) +
  theme(plot.title = element_text(hjust = 0.5),
        plot.tag.location = "plot",
        plot.tag          = element_text(size = 18, margin = margin(t = -30)),
        axis.title = element_blank(),
        panel.grid = element_blank(),
        strip.background = element_rect(fill = "gray97", color = "gray97"),
        legend.position = "none")

# 1.5) create the legend separately, also to ensure that all color categories are plotted
category_labels <- c("not significant","pos. effect (fresh)","neg. effect (fresh)",
                     "pos. effect (past.)","neg. effect (past.)")
gg_base    <- data.frame(x       = rnorm(n = 5),
                         y       = rnorm(n = 5),
                         col_var = factor(category_labels, levels = category_labels)) %>% 
  ggplot(aes(x = x, y = y, fill = col_var)) +
  geom_tile() +
  scale_fill_manual("q-value", values = c("not significant"     = "gray95",
                                          "pos. effect (fresh)" = "#7cbde9",
                                          "neg. effect (fresh)" = "#1F78B4",
                                          "pos. effect (past.)" = "#ffb366",
                                          "neg. effect (past.)" = "#FF7F00"),
                    na.value = "white") +
  theme_minimal(base_size = 14) +
  theme(legend.position = "bottom")
gg_legend  <- ggpubr::get_legend(gg_base) %>% ggpubr::as_ggplot()


# 2) plot of age-specific effects
gg_age <- dat_plot_ageInt %>% 
  filter(intervention != "Baseline median TPM abd.") %>% 
  mutate(pvalue_signif = gsub(pvalue_signif, pattern = "significant ",  replacement = ""),
         pvalue_signif = gsub(pvalue_signif, pattern = " intervention", replacement = ""),
         pvalue_signif = gsub(pvalue_signif, pattern = "pasteurized",   replacement = "past."),
         pvalue_signif = factor(pvalue_signif, levels = c("not significant", "pos. effect (fresh)",
                                                          "neg. effect (fresh)", "pos. effect (past.)",
                                                          "neg. effect (past.)", ""))) %>% 
  mutate(intervention2 = gsub(intervention,  pattern = "Fresh intervention\n\\(",       replacement = ""),
         intervention2 = gsub(intervention2, pattern = "Pasteurized intervention\n\\(", replacement = ""),
         intervention2 = gsub(intervention2, pattern = "\\)", replacement = ""),
         intervention2 = gsub(intervention2, pattern = "age low",  replacement = "age 21-49"),
         intervention2 = gsub(intervention2, pattern = "age high", replacement = "age 50-69"),
         intervention3 = gsub(intervention,  pattern = "\n\\(age low\\)",  replacement = ""),
         intervention3 = gsub(intervention3, pattern = "\n\\(age high\\)", replacement = "")) %>%
  ggplot(aes(x = intervention2, y = gene)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("q-value", values = c("not significant" = "gray95",
                                          "pos. effect (fresh)" = "#7cbde9",
                                          "neg. effect (fresh)" = "#1F78B4",
                                          "pos. effect (past.)" = "#ffb366",
                                          "neg. effect (past.)" = "#FF7F00"),
                    na.value = "white") +
  guides(fill = guide_legend(nrow = 1)) +
  facet_grid(cols = vars(intervention3), scales = "free", space = "free") +
  theme_minimal(base_size = 14) +
  theme(plot.title = element_text(hjust = 0.5),
        axis.title = element_blank(),
        panel.grid = element_blank(),
        strip.text.x     = element_blank(),
        strip.background = element_rect(fill = "gray97", color = "gray97"),
        legend.position  = "none")


# 3) plot of gender-specific effects
gg_sex <- dat_plot_sexInt %>% 
  filter(intervention != "Baseline median TPM abd.") %>% 
  mutate(pvalue_signif = gsub(pvalue_signif, pattern = "significant ",  replacement = ""),
         pvalue_signif = gsub(pvalue_signif, pattern = " intervention", replacement = ""),
         pvalue_signif = gsub(pvalue_signif, pattern = "pasteurized",   replacement = "past."),
         pvalue_signif = factor(pvalue_signif, levels = c("not significant", "pos. effect (fresh)",
                                                          "neg. effect (fresh)", "pos. effect (past.)",
                                                          "neg. effect (past.)", ""))) %>% 
  mutate(intervention2 = gsub(intervention,  pattern = "Fresh intervention\n\\(",       replacement = ""),
         intervention2 = gsub(intervention2, pattern = "Pasteurized intervention\n\\(", replacement = ""),
         intervention2 = gsub(intervention2, pattern = "\\)", replacement = ""),
         intervention2 = gsub(intervention2, pattern = "gender ",  replacement = ""),
         intervention2 = factor(intervention2, levels = c("male", "female")),
         intervention3 = gsub(intervention,  pattern = "\n\\(gender male\\)",  replacement = ""),
         intervention3 = gsub(intervention3, pattern = "\n\\(gender female\\)", replacement = "")) %>%
  ggplot(aes(x = intervention2, y = gene)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("q-value", values = c("not significant" = "gray95",
                                          "pos. effect (fresh)" = "#7cbde9",
                                          "neg. effect (fresh)" = "#1F78B4",
                                          "pos. effect (past.)" = "#ffb366",
                                          "neg. effect (past.)" = "#FF7F00"),
                    na.value = "white") +
  facet_grid(cols = vars(intervention3), scales = "free", space = "free") +
  theme_minimal(base_size = 14) +
  theme(plot.title = element_text(hjust = 0.5),
        axis.title = element_blank(),
        panel.grid = element_blank(),
        strip.text.x     = element_blank(),
        strip.background = element_rect(fill = "gray97", color = "gray97"),
        legend.position = "none")

# 4) plot of BMI-specific effects
gg_bmi <- dat_plot_bmiInt %>% 
  filter(intervention != "Baseline median TPM abd.") %>% 
  mutate(pvalue_signif = gsub(pvalue_signif, pattern = "significant ",  replacement = ""),
         pvalue_signif = gsub(pvalue_signif, pattern = " intervention", replacement = ""),
         pvalue_signif = gsub(pvalue_signif, pattern = "pasteurized",   replacement = "past."),
         pvalue_signif = factor(pvalue_signif, levels = c("not significant", "pos. effect (fresh)",
                                                          "neg. effect (fresh)", "pos. effect (past.)",
                                                          "neg. effect (past.)", ""))) %>% 
  mutate(intervention2 = gsub(intervention,  pattern = "Fresh intervention\n\\(",       replacement = ""),
         intervention2 = gsub(intervention2, pattern = "Pasteurized intervention\n\\(", replacement = ""),
         intervention2 = gsub(intervention2, pattern = "\\)", replacement = ""),
         intervention2 = gsub(intervention2, pattern = "BMI low",  replacement = "BMI [18, 25)"),
         intervention2 = gsub(intervention2, pattern = "BMI high", replacement = "BMI [25, 31]"),
         intervention3 = gsub(intervention,  pattern = "\n\\(BMI low\\)",  replacement = ""),
         intervention3 = gsub(intervention3, pattern = "\n\\(BMI high\\)", replacement = "")) %>%
  ggplot(aes(x = intervention2, y = gene)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("q-value", values = c("not significant" = "gray95",
                                          "pos. effect (fresh)" = "#7cbde9",
                                          "neg. effect (fresh)" = "#1F78B4",
                                          "pos. effect (past.)" = "#ffb366",
                                          "neg. effect (past.)" = "#FF7F00"),
                    na.value = "white") +
  # facet_grid(rows = vars(phylum), cols = vars(intervention3), scales = "free", space = "free") +
  facet_grid(cols = vars(intervention3), scales = "free", space = "free") +
  # ggtitle("BMI-specific effects") +
  theme_minimal(base_size = 14) +
  theme(plot.title = element_text(hjust = 0.5),
        axis.title = element_blank(),
        panel.grid = element_blank(),
        strip.text.x     = element_blank(),
        strip.background = element_rect(fill = "gray97", color = "gray97"),
        legend.position = "none")

# 5) plot of the overview table of the uncorrected p-value tendencies
gg_table <- plot_dat_longer %>% 
  mutate(effect_type = case_when(effect_type == "ageLow"  ~ "age 21-49",
                                 effect_type == "ageHigh" ~ "age 50-69",
                                 effect_type == "sexM"    ~ "male",
                                 effect_type == "sexW"    ~ "female",
                                 effect_type == "bmiLow"  ~ "BMI [18, 25)",
                                 effect_type == "bmiHigh" ~ "BMI [25, 31]",
                                 TRUE                     ~ effect_type),
         effect_type = factor(effect_type, levels = c("overall",
                                                      "age 50-69", "age 21-49",
                                                      "female", "male",
                                                      "BMI [25, 31]", "BMI [18, 25)"))) %>% 
  ggplot(aes(x = intervention, y = effect_type)) +
  geom_tile(aes(fill = fill_tiles, alpha = alpha_tiles)) +
  geom_text(aes(label = value, alpha = alpha_value)) +
  facet_grid(effect_group ~ SCFA, scales = "free") +
  scale_fill_manual(values = c("no"             = "white",
                               "Pyruvate"       = col_vector[1],
                               "Acetyl-CoA"     = col_vector[2],
                               "Acetate"        = col_vector[3],
                               "succinate"      = col_vector[4],
                               "Butyryl-CoA"    = col_vector[5],
                               "propionate"     = col_vector[6])) +
  theme_minimal(base_size = 13) +
  theme(plot.title      = element_text(hjust = 0.5),
        axis.title      = element_blank(),
        panel.grid      = element_blank(),
        legend.position = "none",
        strip.background.x = element_rect(fill = "gray97", color = "gray97"),
        strip.text.y    = element_blank())


# 6) small plot just for adding some in between titles to the joint plot
text_dat <- data.frame(x = 0.5,
                       y = 0.6,
                       label = c("Stratified effects", "Summary of trends"))
gg_header_base <- ggplot() +
  geom_hline(yintercept = 0) +
  xlim(c(0,1)) + ylim(c(0,1)) +
  theme_minimal(base_size = 14) +
  theme(panel.grid        = element_blank(),
        plot.tag.location = "plot",
        plot.tag          = element_text(size = 18, margin = margin(t = -5)),
        axis.text         = element_blank(),
        axis.title        = element_blank(),
        plot.margin       = margin(t = 0, b = 0, l = 0, r = 0),
        legend.position   = "none")
gg_headerA <- gg_header_base +
  geom_text(data = text_dat %>% dplyr::slice(1), aes(x = x, y = y, label = label), size = 5) +
  labs(tag = "(B)")
gg_headerB <- gg_header_base +
  geom_text(data = text_dat %>% dplyr::slice(2), aes(x = x, y = y, label = label), size = 5) +
  labs(tag = "(C)")

# 7) joint plot
layout <- "
AA
BB
CD
EF
EI
GI
HI
HJ
"
(gg_overall + gg_legend + gg_headerA + gg_headerB + gg_age + ggplot() + gg_sex + gg_bmi + gg_table) +
  patchwork::plot_layout(ncol = 2, heights = c(.9,.7,.2,.1,.9,.75,.45,0), design = layout) +
  patchwork::plot_annotation(title = "Overall effects on SCFA gene abundance",
                             theme    = theme(plot.title    = element_text(size = 16)))

Code
# ggsave("Figure6_geneScreening.png", width = 13, height = 7.5)

KO Results (genes)

Available information
Code
dat_KOresults %>% 
  head(5) %>% 
  kable() %>% 
  kable_styling()
SCFA gene relFreq_zeroTPM relFreq_min10TPM min_tpmAbd mean_tpmAbd median_tpmAbd max_tpmAbd baseline_medianTPMAbd baseline_prevalence postFresh_medianTPMAbd postFresh_prevalence postPast_medianTPMAbd postPast_prevalence model model_deviance model_meanAbsResid freshInt_effect freshInt_se freshInt_pvalue pastInt_effect pastInt_se pastInt_pvalue phaseEffect genderEffect ageEffect bmiEffect freshInt_ageLow_effect freshInt_ageHigh_effect freshInt_ageLow_se freshInt_ageHigh_se freshInt_ageLow_pvalue freshInt_ageHigh_pvalue freshInt_sexM_effect freshInt_sexW_effect freshInt_sexM_se freshInt_sexW_se freshInt_sexM_pvalue freshInt_sexW_pvalue freshInt_bmiLow_effect freshInt_bmiHigh_effect freshInt_bmiLow_se freshInt_bmiHigh_se freshInt_bmiLow_pvalue freshInt_bmiHigh_pvalue pastInt_ageLow_effect pastInt_ageHigh_effect pastInt_ageLow_se pastInt_ageHigh_se pastInt_ageLow_pvalue pastInt_ageHigh_pvalue pastInt_sexM_effect pastInt_sexW_effect pastInt_sexM_se pastInt_sexW_se pastInt_sexM_pvalue pastInt_sexW_pvalue pastInt_bmiLow_effect pastInt_bmiHigh_effect pastInt_bmiLow_se pastInt_bmiHigh_se pastInt_bmiLow_pvalue pastInt_bmiHigh_pvalue freshInt_pvalue_correct pastInt_pvalue_correct freshInt_ageLow_pvalue_correct pastInt_ageLow_pvalue_correct freshInt_ageHigh_pvalue_correct pastInt_ageHigh_pvalue_correct freshInt_sexM_pvalue_correct pastInt_sexM_pvalue_correct freshInt_sexW_pvalue_correct pastInt_sexW_pvalue_correct freshInt_bmiLow_pvalue_correct pastInt_bmiLow_pvalue_correct freshInt_bmiHigh_pvalue_correct pastInt_bmiHigh_pvalue_correct
NA K00003 0 1.0000 53.2414 194.2078 197.5625 248.4660 198.6070 1.0000 196.0480 1.0000 194.9850 1.0000 Gaussian 0.4833 NA -2.6504 2.9109 0.3633 -3.7757 2.9223 0.1974 4.2762 -3.9392 0.0715 1.2181 -0.3362 -5.4444 3.9499 4.3365 0.9322 0.2104 -4.6378 -1.1353 4.4394 3.8620 0.2971 0.7690 -7.1213 4.9448 3.6536 4.7583 0.0523 0.2996 -2.0794 -5.8683 3.9499 4.3744 0.5990 0.1809 -7.6452 -0.8076 4.4394 3.8888 0.0862 0.8356 -5.8390 -0.2335 3.6536 4.8091 0.1111 0.9613 0.6955 0.5868 0.9891 0.8941 0.7660 0.7617 0.8427 0.6975 0.9531 0.9680 0.5434 0.6151 0.7640 0.9917
NA K00005 0 0.9942 6.2805 34.0300 31.6354 81.7872 31.8760 1.0000 31.7015 1.0000 31.0494 0.9765 Gaussian 0.7267 NA -0.0439 1.0534 0.9668 -0.5992 1.0576 0.5715 1.4246 -2.7774 -0.1163 1.0979 0.2574 -0.4146 1.4302 1.5698 0.8573 0.7919 -0.4380 0.2641 1.5951 1.3872 0.7838 0.8492 -1.0012 1.5823 1.3291 1.7309 0.4519 0.3615 0.1993 -1.5789 1.4302 1.5835 0.8893 0.3197 -3.2757 1.4534 1.5951 1.3968 0.0410 0.2991 -1.2116 0.4565 1.3291 1.7494 0.3628 0.7943 0.9869 0.8033 0.9721 0.9798 0.9548 0.7909 0.9550 0.5777 0.9693 0.8429 0.8442 0.7947 0.7941 0.9621
NA K00008 0 0.7018 2.2753 15.1554 13.2774 73.5507 13.4150 0.6842 13.5326 0.7442 13.1593 0.6941 Gaussian 0.6964 NA 0.5104 0.7080 0.4716 0.1041 0.7108 0.8836 0.4277 -1.7522 -0.0495 0.2024 0.8551 0.0871 0.9601 1.0538 0.3739 0.9342 1.5990 -0.3087 1.0737 0.9338 0.1376 0.7412 -0.6819 2.5325 0.8869 1.1550 0.4427 0.0292 0.8358 -0.7928 0.9601 1.0631 0.3848 0.4565 -0.7072 0.7265 1.0737 0.9402 0.5107 0.4404 -0.1889 0.6042 0.8869 1.1674 0.8315 0.6052 0.7542 0.9539 0.8105 0.8125 0.9899 0.8437 0.7734 0.8918 0.9469 0.8817 0.8403 0.9672 0.4652 0.9057
NA K00009 0 0.9766 5.2603 31.6988 28.6944 115.4810 27.5855 0.9708 28.3442 1.0000 31.7631 0.9647 Gaussian 0.6133 NA 0.8711 1.4752 0.5553 5.4678 1.4810 0.0003 0.6309 -5.3788 -0.1301 -0.5970 1.0875 0.5791 1.9893 2.1838 0.5851 0.7911 0.9690 0.7975 2.2563 1.9625 0.6679 0.6848 2.6018 -2.0721 1.8531 2.4133 0.1615 0.3913 8.1256 2.2125 1.9893 2.2028 0.0001 0.3161 5.3247 5.5778 2.2563 1.9761 0.0190 0.0051 7.3021 2.2959 1.8531 2.4391 0.0001 0.3474 0.7934 0.0552 0.8873 0.0930 0.9546 0.7909 0.9324 0.4766 0.9344 0.4072 0.6656 0.0543 0.8100 0.7860
NA K00010 0 0.9883 3.8333 34.7802 31.5682 111.9080 31.6634 0.9825 30.0142 0.9884 33.0006 1.0000 log-Gaussian 0.5907 NA -0.0595 0.0423 0.1608 0.0093 0.0425 0.8264 0.0200 -0.0238 -0.0019 -0.0070 -0.0395 -0.0844 0.0574 0.0630 0.4921 0.1815 -0.0826 -0.0421 0.0647 0.0563 0.2032 0.4552 -0.0790 -0.0266 0.0535 0.0697 0.1415 0.7031 0.0590 -0.0515 0.0574 0.0635 0.3043 0.4181 -0.0049 0.0203 0.0647 0.0567 0.9394 0.7210 0.0049 0.0169 0.0535 0.0705 0.9272 0.8109 0.5545 0.9283 0.8563 0.7870 0.7619 0.8259 0.8127 0.9891 0.8829 0.9417 0.6483 0.9871 0.9367 0.9642
Number of significant genes
Code
dat_tab <- dat_KOresults %>% 
  mutate(fresh_decision = case_when(freshInt_pvalue_correct < 0.1 ~ "fresh intervention effect significant",
                                    TRUE                           ~ "fresh intervention effect not significant"),
         past_decision  = case_when(pastInt_pvalue_correct < 0.1  ~ "past. intervention effect significant",
                                    TRUE                           ~ "past. intervention effect not significant"))

table(dat_tab$fresh_decision, dat_tab$past_decision) %>% 
  kable() %>% 
  kable_styling()
past. intervention effect not significant past. intervention effect significant
fresh intervention effect not significant 2710 75
Significant genes regarding fresh intervention
Code
dat_KOresults %>% 
  filter(freshInt_pvalue_correct < 0.1) %>% 
  arrange(freshInt_pvalue_correct) %>% 
  dplyr::select(-starts_with("pastInt")) %>% 
  kable() %>% 
  kable_styling()
SCFA gene relFreq_zeroTPM relFreq_min10TPM min_tpmAbd mean_tpmAbd median_tpmAbd max_tpmAbd baseline_medianTPMAbd baseline_prevalence postFresh_medianTPMAbd postFresh_prevalence postPast_medianTPMAbd postPast_prevalence model model_deviance model_meanAbsResid freshInt_effect freshInt_se freshInt_pvalue phaseEffect genderEffect ageEffect bmiEffect freshInt_ageLow_effect freshInt_ageHigh_effect freshInt_ageLow_se freshInt_ageHigh_se freshInt_ageLow_pvalue freshInt_ageHigh_pvalue freshInt_sexM_effect freshInt_sexW_effect freshInt_sexM_se freshInt_sexW_se freshInt_sexM_pvalue freshInt_sexW_pvalue freshInt_bmiLow_effect freshInt_bmiHigh_effect freshInt_bmiLow_se freshInt_bmiHigh_se freshInt_bmiLow_pvalue freshInt_bmiHigh_pvalue freshInt_pvalue_correct freshInt_ageLow_pvalue_correct freshInt_ageHigh_pvalue_correct freshInt_sexM_pvalue_correct freshInt_sexW_pvalue_correct freshInt_bmiLow_pvalue_correct freshInt_bmiHigh_pvalue_correct
Significant genes regarding pasteurized intervention
Code
dat_KOresults %>% 
  filter(pastInt_pvalue_correct < 0.1) %>% 
  arrange(pastInt_pvalue_correct) %>% 
  dplyr::select(-starts_with("freshInt")) %>% 
  kable() %>% 
  kable_styling()
SCFA gene relFreq_zeroTPM relFreq_min10TPM min_tpmAbd mean_tpmAbd median_tpmAbd max_tpmAbd baseline_medianTPMAbd baseline_prevalence postFresh_medianTPMAbd postFresh_prevalence postPast_medianTPMAbd postPast_prevalence model model_deviance model_meanAbsResid pastInt_effect pastInt_se pastInt_pvalue phaseEffect genderEffect ageEffect bmiEffect pastInt_ageLow_effect pastInt_ageHigh_effect pastInt_ageLow_se pastInt_ageHigh_se pastInt_ageLow_pvalue pastInt_ageHigh_pvalue pastInt_sexM_effect pastInt_sexW_effect pastInt_sexM_se pastInt_sexW_se pastInt_sexM_pvalue pastInt_sexW_pvalue pastInt_bmiLow_effect pastInt_bmiHigh_effect pastInt_bmiLow_se pastInt_bmiHigh_se pastInt_bmiLow_pvalue pastInt_bmiHigh_pvalue pastInt_pvalue_correct pastInt_ageLow_pvalue_correct pastInt_ageHigh_pvalue_correct pastInt_sexM_pvalue_correct pastInt_sexW_pvalue_correct pastInt_bmiLow_pvalue_correct pastInt_bmiHigh_pvalue_correct
NA K01424 0.0000 1.0000 33.1424 120.3046 121.1000 208.8170 122.1230 1.0000 122.9770 1.0000 117.0730 1.0000 Gaussian 0.5753 NA -9.2653 2.3022 0.0001 -1.0465 9.1344 0.1137 1.5943 -11.2428 -6.8484 3.1028 3.4360 0.0003 0.0472 -11.5519 -7.5103 3.5006 3.0661 0.0011 0.0149 -10.2908 -7.4894 2.9001 3.8173 0.0005 0.0508 0.0368 0.2091 0.7130 0.3278 0.4518 0.1038 0.5399
NA K01777 0.0029 0.1988 0.0000 7.8345 7.4021 35.8471 7.5195 0.2456 7.4168 0.1860 6.7787 0.1176 Gaussian 0.7542 NA -1.3442 0.3130 0.0000 0.0508 0.7071 0.0340 0.3659 -1.1177 -1.6222 0.4232 0.4686 0.0088 0.0006 -1.7717 -1.0165 0.4756 0.4165 0.0002 0.0153 -1.2815 -1.4532 0.3945 0.5193 0.0013 0.0055 0.0368 0.6399 0.2587 0.2467 0.4544 0.1744 0.3266
NA K01814 0.0000 1.0000 19.9867 82.8668 82.9059 138.9620 84.4132 1.0000 83.4656 1.0000 79.7166 1.0000 Gaussian 0.6235 NA -5.7533 1.4020 0.0001 0.8560 0.7966 -0.0703 0.6738 -7.5910 -3.5012 1.8907 2.0936 0.0001 0.0956 -5.8871 -5.6507 2.1312 1.8665 0.0061 0.0027 -6.3663 -4.6849 1.7642 2.3222 0.0004 0.0447 0.0368 0.1078 0.7321 0.4072 0.4004 0.0911 0.5144
NA K02240 0.0029 0.1608 0.0000 5.8848 3.8797 51.7109 4.0799 0.1813 3.7860 0.1628 3.4472 0.1176 log-Gaussian 0.7970 NA -0.1774 0.0449 0.0001 0.1207 -0.1426 -0.0007 0.0270 -0.1938 -0.1573 0.0608 0.0673 0.0016 0.0202 -0.2426 -0.1274 0.0682 0.0597 0.0004 0.0337 -0.2289 -0.0887 0.0561 0.0738 0.0001 0.2306 0.0368 0.3383 0.6846 0.2467 0.5466 0.0467 0.7110
NA K02800 0.0000 0.9854 8.3409 34.3059 31.4380 99.9480 29.1504 0.9942 31.3454 0.9651 37.0330 0.9882 Gaussian 0.6669 NA 6.1388 1.4635 0.0000 1.3081 -6.8307 -0.0540 0.0161 9.4735 2.0536 1.9572 2.1672 0.0000 0.3442 5.1728 6.8796 2.2279 1.9511 0.0210 0.0005 7.8027 3.2534 1.8360 2.4167 0.0000 0.1794 0.0368 0.0246 0.8022 0.4836 0.2637 0.0459 0.6782
NA K03483 0.0000 0.8655 1.3906 22.1851 19.6555 84.4915 18.1234 0.8655 19.1580 0.8488 22.5557 0.8824 log-Gaussian 0.6403 NA 0.2027 0.0502 0.0001 0.0329 -0.2000 -0.0062 -0.0182 0.2536 0.1401 0.0677 0.0750 0.0002 0.0628 0.1486 0.2442 0.0763 0.0668 0.0524 0.0003 0.2363 0.1445 0.0631 0.0831 0.0002 0.0832 0.0368 0.1889 0.7172 0.6194 0.2467 0.0717 0.5923
NA K03486 0.0000 0.8070 2.7685 17.5988 15.9824 68.6303 17.1748 0.8246 16.1217 0.8140 14.4540 0.7647 log-Gaussian 0.7372 NA -0.1517 0.0380 0.0001 -0.0263 -0.1695 0.0005 0.0109 -0.1760 -0.1219 0.0514 0.0569 0.0007 0.0332 -0.1806 -0.1295 0.0579 0.0507 0.0020 0.0111 -0.2099 -0.0510 0.0474 0.0624 0.0000 0.4145 0.0368 0.2861 0.6943 0.3791 0.4473 0.0459 0.8220
NA K06902 0.0000 1.0000 10.8021 55.6987 54.0527 102.9490 55.3348 1.0000 55.5366 1.0000 52.0884 1.0000 log-Gaussian 0.6504 NA -0.1210 0.0289 0.0000 0.0384 -0.0360 -0.0034 0.0269 -0.1369 -0.1018 0.0390 0.0431 0.0005 0.0190 -0.1043 -0.1339 0.0440 0.0385 0.0185 0.0006 -0.1445 -0.0805 0.0362 0.0477 0.0001 0.0928 0.0368 0.2513 0.6846 0.4714 0.2934 0.0533 0.6015
NA K06978 0.0000 0.9152 4.9190 19.0248 17.4046 50.0675 18.4287 0.9240 17.7309 0.9419 15.7752 0.8706 Gaussian 0.7449 NA -2.3375 0.5937 0.0001 -0.1269 -0.5987 -0.1080 0.3158 -2.4016 -2.2586 0.8021 0.8881 0.0030 0.0116 -1.9224 -2.6559 0.9036 0.7912 0.0343 0.0009 -2.8158 -1.5092 0.7467 0.9828 0.0002 0.1259 0.0368 0.4183 0.6582 0.5499 0.3224 0.0705 0.6261
NA K10672 0.0000 0.9591 4.7057 26.3704 23.5292 97.7524 24.0854 0.9591 23.2503 0.9535 21.3362 0.9647 log-Gaussian 0.7270 NA -0.1487 0.0374 0.0001 0.0241 -0.0274 0.0063 0.0311 -0.1761 -0.1151 0.0506 0.0561 0.0006 0.0411 -0.2058 -0.1048 0.0568 0.0498 0.0004 0.0361 -0.2017 -0.0570 0.0466 0.0614 0.0000 0.3539 0.0368 0.2573 0.7130 0.2467 0.5619 0.0459 0.7899
Pyruvate K13497 0.0000 0.6959 0.4710 14.4480 12.9618 75.9723 11.8396 0.6433 13.0961 0.6512 14.8469 0.8471 log-Gaussian 0.6359 NA 0.1897 0.0447 0.0000 0.0649 0.0114 0.0032 -0.0265 0.2643 0.0982 0.0601 0.0666 0.0000 0.1412 0.1276 0.2374 0.0679 0.0595 0.0613 0.0001 0.2307 0.1189 0.0561 0.0739 0.0001 0.1087 0.0368 0.0353 0.7486 0.6453 0.2064 0.0467 0.6144
NA K16509 0.0058 0.2398 0.0000 8.9212 5.9768 110.6230 6.5589 0.2573 6.2056 0.2558 5.2495 0.1882 log-Gaussian 0.7187 NA -0.2191 0.0556 0.0001 0.1461 -0.1452 -0.0014 0.0145 -0.2482 -0.1835 0.0752 0.0833 0.0011 0.0285 -0.3239 -0.1388 0.0842 0.0737 0.0002 0.0608 -0.2315 -0.1984 0.0695 0.0914 0.0010 0.0310 0.0368 0.3059 0.6849 0.2393 0.6432 0.1512 0.4652
NA K17810 0.0000 0.9942 5.2468 35.1354 33.9057 85.0960 35.2136 1.0000 32.9209 0.9884 31.1843 0.9882 log-Gaussian 0.6591 NA -0.1475 0.0352 0.0000 0.0058 0.0940 -0.0023 0.0202 -0.1739 -0.1153 0.0475 0.0526 0.0003 0.0291 -0.1688 -0.1312 0.0535 0.0469 0.0018 0.0055 -0.1730 -0.1034 0.0442 0.0582 0.0001 0.0769 0.0368 0.2091 0.6849 0.3776 0.4072 0.0562 0.5816
NA K19157 0.0000 1.0000 60.4201 230.0660 216.4270 893.0340 220.8300 1.0000 219.2595 1.0000 204.2390 1.0000 Gaussian 0.7899 NA -27.1564 6.5238 0.0000 1.4038 25.1763 -0.0575 -2.6140 -24.5854 -30.3513 8.7949 9.7374 0.0056 0.0020 -21.8393 -31.2335 9.9199 8.6860 0.0286 0.0004 -32.4534 -17.9670 8.2015 10.7950 0.0001 0.0973 0.0368 0.5799 0.3597 0.5254 0.2467 0.0543 0.6040
NA K19510 0.0000 0.9766 4.7364 32.7387 30.4487 81.2096 31.5631 0.9883 30.7407 0.9651 26.5509 0.9647 log-Gaussian 0.7379 NA -0.1379 0.0334 0.0000 0.0396 -0.1268 -0.0075 0.0521 -0.1481 -0.1254 0.0452 0.0501 0.0012 0.0129 -0.1671 -0.1155 0.0508 0.0445 0.0011 0.0100 -0.2106 -0.0121 0.0413 0.0544 0.0000 0.8238 0.0368 0.3059 0.6698 0.3278 0.4353 0.0074 0.9653
NA K19956 0.0000 0.9883 7.3330 34.9770 31.4204 135.0230 29.1937 0.9942 31.1940 0.9884 36.8261 0.9765 log-Gaussian 0.6027 NA 0.1809 0.0455 0.0001 0.0623 -0.1212 -0.0022 -0.0159 0.2738 0.0671 0.0610 0.0676 0.0000 0.3218 0.1796 0.1819 0.0694 0.0608 0.0101 0.0030 0.2114 0.1281 0.0573 0.0754 0.0003 0.0906 0.0368 0.0353 0.7918 0.4362 0.4004 0.0764 0.6015
NA K17202 0.0000 0.2865 0.7368 8.6046 7.3109 32.7252 6.4956 0.2456 7.1767 0.2558 8.4218 0.4000 Gaussian 0.6951 NA 1.7457 0.4463 0.0001 0.0239 -0.0232 0.0071 0.0849 1.8970 1.5602 0.6042 0.6690 0.0019 0.0204 1.0953 2.2444 0.6771 0.5929 0.1070 0.0002 0.9629 3.1014 0.5561 0.7320 0.0846 0.0000 0.0383 0.3501 0.6846 0.7344 0.2467 0.5949 0.0459
NA K17204 0.0000 0.2281 0.7210 7.7044 6.5511 28.8417 6.0322 0.1930 6.6808 0.2093 7.7010 0.3176 log-Gaussian 0.7032 NA 0.1641 0.0421 0.0001 -0.0031 0.0375 -0.0019 0.0137 0.2070 0.1117 0.0568 0.0629 0.0003 0.0770 0.0890 0.2217 0.0638 0.0558 0.1638 0.0001 0.1105 0.2569 0.0527 0.0693 0.0370 0.0003 0.0383 0.2091 0.7223 0.7947 0.2064 0.4864 0.0759
NA K01478 0.0000 0.9152 2.8968 22.3548 19.6443 77.0576 20.6079 0.9474 19.8361 0.9070 18.0370 0.8588 log-Gaussian 0.7371 NA -0.1465 0.0379 0.0001 0.0244 0.0536 0.0049 0.0232 -0.1146 -0.1857 0.0512 0.0566 0.0259 0.0012 -0.1715 -0.1273 0.0577 0.0505 0.0032 0.0123 -0.1926 -0.0668 0.0473 0.0623 0.0001 0.2845 0.0389 0.6849 0.3059 0.4004 0.4512 0.0467 0.7549
NA K02781 0.0000 0.8977 3.2330 28.8319 25.3120 107.0630 24.4052 0.8947 24.5902 0.8721 28.8944 0.9294 log-Gaussian 0.6018 NA 0.2088 0.0540 0.0001 0.0463 -0.2021 -0.0082 -0.0093 0.2483 0.1605 0.0730 0.0808 0.0008 0.0481 0.1457 0.2573 0.0820 0.0718 0.0766 0.0004 0.2515 0.1345 0.0677 0.0891 0.0002 0.1321 0.0389 0.2861 0.7149 0.6806 0.2467 0.0736 0.6330
NA K02760 0.0000 1.0000 16.5141 79.9727 73.4233 248.7300 77.5212 1.0000 68.4372 1.0000 65.1466 1.0000 log-Gaussian 0.7162 NA -0.1414 0.0374 0.0002 0.0743 -0.1988 -0.0043 0.0258 -0.1434 -0.1392 0.0507 0.0561 0.0050 0.0137 -0.1765 -0.1145 0.0569 0.0498 0.0021 0.0223 -0.1761 -0.0817 0.0469 0.0617 0.0002 0.1864 0.0492 0.5609 0.6698 0.3817 0.4855 0.0714 0.6849
NA K07393 0.0029 0.9298 0.0000 25.6790 24.3219 67.5453 24.8859 0.9415 24.8432 0.9302 21.4166 0.9059 Gaussian 0.7107 NA -3.6654 0.9690 0.0002 0.4776 -2.5730 -0.0216 0.8780 -4.0816 -3.1510 1.3092 1.4496 0.0020 0.0306 -3.9318 -3.4614 1.4758 1.2924 0.0082 0.0079 -4.1480 -2.8371 1.2156 1.6000 0.0007 0.0774 0.0492 0.3597 0.6849 0.4214 0.4214 0.1299 0.5818
NA K12528 0.0000 0.9766 4.5560 27.8176 26.5937 67.0434 27.9343 0.9825 26.8963 0.9651 23.2623 0.9765 Gaussian 0.7416 NA -3.0979 0.8231 0.0002 1.5922 -4.5045 -0.1591 0.8118 -3.7212 -2.3350 1.1131 1.2325 0.0009 0.0593 -2.4212 -3.6171 1.2527 1.0970 0.0543 0.0011 -3.5124 -2.3860 1.0312 1.3574 0.0008 0.0799 0.0500 0.3059 0.7166 0.6261 0.3278 0.1308 0.5857
NA K18692 0.0000 0.1053 0.2272 5.1297 3.5737 47.4198 3.6423 0.1287 3.4486 0.1047 3.3714 0.0588 log-Gaussian 0.7765 NA -0.1639 0.0437 0.0002 0.1157 -0.1328 -0.0009 0.0203 -0.1753 -0.1498 0.0592 0.0655 0.0033 0.0230 -0.2708 -0.0819 0.0659 0.0577 0.0001 0.1569 -0.1966 -0.1075 0.0546 0.0719 0.0004 0.1359 0.0505 0.4366 0.6849 0.2064 0.7893 0.0923 0.6349
NA K02245 0.0117 0.1228 0.0000 5.3248 3.5439 72.9346 3.7430 0.1404 3.3975 0.1279 3.2111 0.0824 log-Gaussian 0.7796 NA -0.1757 0.0471 0.0002 0.1096 -0.1423 -0.0010 0.0250 -0.2065 -0.1379 0.0637 0.0705 0.0013 0.0514 -0.2548 -0.1151 0.0714 0.0625 0.0004 0.0667 -0.2320 -0.0786 0.0588 0.0774 0.0001 0.3109 0.0519 0.3101 0.7149 0.2467 0.6559 0.0543 0.7678
NA K00009 0.0000 0.9766 5.2603 31.6988 28.6944 115.4810 27.5855 0.9708 28.3442 1.0000 31.7631 0.9647 Gaussian 0.6133 NA 5.4678 1.4810 0.0003 0.6309 -5.3788 -0.1301 -0.5970 8.1256 2.2125 1.9893 2.2028 0.0001 0.3161 5.3247 5.5778 2.2563 1.9761 0.0190 0.0051 7.3021 2.2959 1.8531 2.4391 0.0001 0.3474 0.0552 0.0930 0.7909 0.4766 0.4072 0.0543 0.7860
NA K01042 0.0000 0.9064 2.1992 20.2673 18.8658 67.5662 19.0928 0.9357 19.7120 0.8953 16.4124 0.8588 log-Gaussian 0.7483 NA -0.1266 0.0343 0.0003 0.0268 -0.0267 0.0038 0.0331 -0.0906 -0.1706 0.0463 0.0512 0.0514 0.0010 -0.1553 -0.1046 0.0522 0.0457 0.0032 0.0229 -0.1791 -0.0357 0.0427 0.0562 0.0000 0.5254 0.0552 0.7149 0.3059 0.4004 0.4912 0.0460 0.8794
NA K01497 0.0000 0.3889 1.1883 9.5502 8.6160 34.5050 8.0224 0.3392 8.8291 0.4070 9.5833 0.4706 log-Gaussian 0.6102 NA 0.1697 0.0461 0.0003 0.0357 -0.0903 -0.0058 0.0106 0.2304 0.0952 0.0620 0.0687 0.0002 0.1671 0.1370 0.1948 0.0701 0.0614 0.0518 0.0017 0.1806 0.1506 0.0580 0.0764 0.0021 0.0497 0.0552 0.1982 0.7544 0.6165 0.3697 0.2051 0.5332
NA K19509 0.0000 0.8626 1.4455 21.5168 18.5855 79.5557 19.7792 0.8655 18.9192 0.8837 16.5391 0.8353 log-Gaussian 0.8189 NA -0.1369 0.0372 0.0003 0.1039 -0.3228 -0.0088 0.0332 -0.1814 -0.0825 0.0502 0.0555 0.0004 0.1388 -0.1589 -0.1200 0.0566 0.0496 0.0054 0.0163 -0.2049 -0.0191 0.0464 0.0611 0.0000 0.7545 0.0552 0.2091 0.7486 0.4072 0.4584 0.0459 0.9549
NA K01299 0.0000 0.9766 4.2924 30.4604 28.8591 61.5233 29.9912 0.9883 28.8573 0.9651 26.7801 0.9647 log-Gaussian 0.6838 NA -0.1172 0.0321 0.0003 0.0208 0.0842 0.0031 0.0316 -0.1374 -0.0928 0.0433 0.0479 0.0017 0.0540 -0.1622 -0.0828 0.0488 0.0427 0.0010 0.0539 -0.1571 -0.0481 0.0403 0.0530 0.0001 0.3654 0.0573 0.3383 0.7149 0.3224 0.6261 0.0562 0.7970
NA K01914 0.0000 1.0000 36.6096 108.3381 109.3120 164.6550 110.9380 1.0000 108.7070 1.0000 103.5820 1.0000 Gaussian 0.5573 NA -5.5959 1.5386 0.0003 0.5691 -0.4816 0.1323 1.1169 -7.0257 -3.8445 2.0788 2.3021 0.0008 0.0961 -5.3836 -5.7574 2.3284 2.0395 0.0215 0.0051 -7.9766 -1.4786 1.9190 2.5259 0.0000 0.5588 0.0573 0.2893 0.7321 0.4852 0.4072 0.0467 0.8886
NA K06890 0.0000 0.9942 7.2313 39.4151 37.2706 99.5786 39.6014 0.9942 34.7195 1.0000 36.4245 0.9882 log-Gaussian 0.7294 NA -0.1238 0.0340 0.0003 0.0134 0.0056 0.0008 0.0182 -0.1759 -0.0602 0.0456 0.0505 0.0001 0.2344 -0.1281 -0.1205 0.0516 0.0452 0.0137 0.0081 -0.1265 -0.1193 0.0428 0.0563 0.0034 0.0350 0.0573 0.1348 0.7667 0.4518 0.4214 0.2728 0.4819
NA K00030 0.0000 0.9152 1.0457 21.1786 20.1527 56.5064 20.9401 0.9415 19.3735 0.9070 18.7931 0.8706 log-Gaussian 0.6722 NA -0.1373 0.0378 0.0003 0.0380 -0.0419 -0.0018 0.0306 -0.1367 -0.1382 0.0512 0.0567 0.0081 0.0155 -0.1469 -0.1299 0.0576 0.0504 0.0113 0.0105 -0.1685 -0.0833 0.0475 0.0626 0.0005 0.1843 0.0577 0.6399 0.6698 0.4476 0.4425 0.1038 0.6828
NA K06971 0.0029 0.3655 0.0000 9.8625 8.3033 33.2740 9.0905 0.4035 8.1373 0.3488 7.4715 0.3059 log-Gaussian 0.6705 NA -0.1488 0.0411 0.0004 -0.0010 0.0088 -0.0007 0.0209 -0.2277 -0.0523 0.0552 0.0611 0.0001 0.3929 -0.1784 -0.1262 0.0626 0.0548 0.0047 0.0221 -0.1869 -0.0827 0.0516 0.0679 0.0003 0.2243 0.0583 0.0930 0.8160 0.4072 0.4852 0.0897 0.7085
NA K23121 0.0000 0.2544 0.9395 8.4897 7.7824 25.2666 7.9853 0.2865 7.8147 0.2674 7.2420 0.1765 log-Gaussian 0.5815 NA -0.1323 0.0367 0.0004 -0.0271 0.0268 -0.0008 0.0243 -0.1345 -0.1294 0.0493 0.0546 0.0069 0.0186 -0.1535 -0.1160 0.0558 0.0489 0.0064 0.0184 -0.1783 -0.0524 0.0459 0.0604 0.0001 0.3868 0.0587 0.6063 0.6846 0.4072 0.4710 0.0577 0.8100
NA K02745 0.0029 0.4006 0.0000 9.7838 8.2668 53.0323 8.7820 0.4327 7.7779 0.3953 7.4664 0.3412 log-Gaussian 0.7518 NA -0.1820 0.0512 0.0005 0.0749 -0.1737 0.0026 0.0638 -0.2128 -0.1445 0.0692 0.0766 0.0023 0.0604 -0.2391 -0.1383 0.0779 0.0682 0.0024 0.0437 -0.1850 -0.1772 0.0645 0.0849 0.0045 0.0380 0.0698 0.3815 0.7172 0.3993 0.5861 0.3228 0.4906
NA K00020 0.0000 0.9854 4.0023 25.2860 23.8846 71.0569 24.9625 1.0000 22.6221 0.9767 23.1333 0.9647 log-Gaussian 0.6701 NA -0.1109 0.0317 0.0005 0.0157 -0.0858 0.0021 0.0312 -0.0817 -0.1466 0.0428 0.0474 0.0575 0.0022 -0.0996 -0.1195 0.0483 0.0423 0.0400 0.0051 -0.1495 -0.0439 0.0397 0.0522 0.0002 0.4010 0.0805 0.7149 0.3778 0.5716 0.4072 0.0705 0.8158
NA K08177 0.0000 1.0000 12.1593 41.1144 39.9032 85.0838 41.9585 1.0000 40.3902 1.0000 38.2433 1.0000 Gaussian 0.6433 NA -3.7143 1.0615 0.0005 1.3324 1.1857 -0.1371 0.5736 -3.0444 -4.5395 1.4339 1.5878 0.0347 0.0046 -5.1183 -2.6375 1.6114 1.4112 0.0017 0.0627 -4.6287 -2.1343 1.3326 1.7541 0.0006 0.2248 0.0805 0.7021 0.5460 0.3697 0.6485 0.1152 0.7085
NA K06180 0.0000 1.0000 64.9669 215.6466 215.9980 274.0710 218.0880 1.0000 218.3885 1.0000 212.1470 1.0000 Gaussian 0.4837 NA -9.0821 2.6094 0.0006 3.0099 2.1668 0.3503 1.4619 -8.6135 -9.6698 3.5152 3.8931 0.0149 0.0136 -15.2439 -4.3540 3.9419 3.4531 0.0001 0.2084 -9.7075 -8.0176 3.2775 4.3142 0.0033 0.0642 0.0814 0.6698 0.6698 0.2393 0.8171 0.2720 0.5646
NA K07029 0.0000 1.0000 67.6451 239.9732 239.9145 327.9390 240.3360 1.0000 241.4220 1.0000 238.9860 1.0000 Gaussian 0.5379 NA -11.2976 3.2537 0.0006 1.8469 -0.2456 0.1686 2.1968 -10.1771 -12.6772 4.3936 4.8657 0.0213 0.0097 -15.4679 -8.0966 4.9293 4.3176 0.0019 0.0618 -13.7588 -7.0574 4.0774 5.3670 0.0008 0.1896 0.0814 0.6846 0.6399 0.3776 0.6455 0.1410 0.6878
NA K10794 0.0000 0.1871 0.9772 7.5465 6.8353 30.1187 7.0050 0.2047 6.9130 0.2093 6.1879 0.1294 Gaussian 0.6919 NA -1.1920 0.3426 0.0006 0.0144 0.3579 0.0454 0.3652 -1.1338 -1.2639 0.4637 0.5135 0.0151 0.0145 -1.8034 -0.7232 0.5196 0.4550 0.0006 0.1132 -1.1991 -1.1798 0.4319 0.5685 0.0059 0.0389 0.0814 0.6698 0.6698 0.2934 0.7452 0.3325 0.4937
NA K01281 0.0000 0.2573 1.4890 8.5402 7.2181 61.0040 7.5921 0.2865 7.2353 0.2442 6.0575 0.2118 log-Gaussian 0.6935 NA -0.1349 0.0391 0.0007 0.0194 0.0187 0.0050 0.0182 -0.1731 -0.0881 0.0528 0.0585 0.0012 0.1331 -0.1831 -0.0980 0.0594 0.0520 0.0023 0.0607 -0.1623 -0.0879 0.0488 0.0643 0.0010 0.1729 0.0834 0.3059 0.7486 0.3946 0.6432 0.1526 0.6736
NA K07251 0.0000 0.3626 0.9618 9.5772 7.9222 63.5507 8.3308 0.3918 8.2335 0.3721 6.6854 0.2941 log-Gaussian 0.7692 NA -0.1494 0.0433 0.0007 0.0573 -0.0535 -0.0088 0.0276 -0.1221 -0.1831 0.0585 0.0647 0.0378 0.0050 -0.1197 -0.1722 0.0660 0.0578 0.0707 0.0032 -0.2212 -0.0251 0.0539 0.0710 0.0001 0.7238 0.0834 0.7064 0.5609 0.6667 0.4004 0.0467 0.9444
NA K07722 0.0000 0.6023 1.5045 12.5528 11.1472 64.9527 11.8575 0.6257 11.8009 0.6512 10.1834 0.5059 log-Gaussian 0.7315 NA -0.1416 0.0410 0.0006 0.0174 0.0456 0.0083 0.0133 -0.1223 -0.1654 0.0554 0.0614 0.0283 0.0075 -0.1412 -0.1419 0.0624 0.0547 0.0246 0.0100 -0.1962 -0.0471 0.0513 0.0675 0.0002 0.4861 0.0834 0.6849 0.6209 0.5089 0.4353 0.0638 0.8607
NA K01258 0.0000 1.0000 38.2694 159.8186 160.0600 245.8310 161.6090 1.0000 160.6395 1.0000 154.2200 1.0000 Gaussian 0.5547 NA -8.0070 2.3449 0.0007 3.5741 -6.7356 0.0962 1.5112 -8.2606 -7.7011 3.1748 3.5159 0.0098 0.0293 -8.8966 -7.3258 3.5717 3.1285 0.0133 0.0199 -10.2960 -4.0556 2.9457 3.8773 0.0006 0.2965 0.0836 0.6399 0.6849 0.4518 0.4832 0.1119 0.7613
NA K01259 0.0029 0.4035 0.0000 11.2897 7.9165 57.1981 7.8159 0.4152 9.1865 0.4419 7.2522 0.3412 log-Gaussian 0.8378 NA -0.1417 0.0414 0.0007 0.0153 -0.2142 0.0047 0.0322 -0.1202 -0.1679 0.0561 0.0621 0.0330 0.0073 -0.1541 -0.1322 0.0631 0.0553 0.0153 0.0175 -0.1489 -0.1291 0.0522 0.0688 0.0047 0.0615 0.0836 0.6943 0.6158 0.4544 0.4666 0.3228 0.5634
NA K04651 0.0000 1.0000 18.7232 76.5684 71.9150 222.9510 74.1096 1.0000 71.9150 1.0000 67.3661 1.0000 log-Gaussian 0.7074 NA -0.1062 0.0310 0.0007 0.0187 0.1403 0.0076 0.0241 -0.0844 -0.1331 0.0419 0.0464 0.0452 0.0045 -0.1500 -0.0727 0.0471 0.0413 0.0016 0.0795 -0.1265 -0.0712 0.0391 0.0514 0.0014 0.1676 0.0836 0.7130 0.5460 0.3697 0.6864 0.1753 0.6696
NA K09963 0.0000 0.4240 0.6694 10.6018 8.8961 47.1530 9.0755 0.4386 9.2760 0.4651 8.1133 0.3529 log-Gaussian 0.7256 NA -0.1476 0.0429 0.0007 0.0565 -0.1545 -0.0050 0.0208 -0.1724 -0.1175 0.0580 0.0642 0.0032 0.0682 -0.2116 -0.0986 0.0652 0.0571 0.0013 0.0852 -0.2007 -0.0559 0.0537 0.0707 0.0002 0.4305 0.0836 0.4321 0.7222 0.3485 0.6965 0.0717 0.8334
NA K19267 0.0029 0.3684 0.0000 9.5612 7.8405 47.0339 8.3720 0.3860 7.2988 0.3488 6.3760 0.3529 log-Gaussian 0.7526 NA -0.1586 0.0462 0.0007 0.0947 0.0104 -0.0074 0.0229 -0.0988 -0.2319 0.0624 0.0690 0.1145 0.0009 -0.1280 -0.1821 0.0704 0.0617 0.0703 0.0034 -0.2470 -0.0059 0.0570 0.0751 0.0000 0.9369 0.0836 0.7449 0.3047 0.6644 0.4004 0.0459 0.9885
NA K05346 0.0000 0.9561 1.2041 32.0852 29.0099 95.7052 27.7434 0.9532 26.5915 0.9419 32.9738 0.9765 log-Gaussian 0.6799 NA 0.1634 0.0481 0.0008 0.0186 -0.1152 -0.0080 -0.0029 0.2148 0.1003 0.0649 0.0719 0.0011 0.1639 0.1318 0.1876 0.0733 0.0642 0.0732 0.0038 0.1607 0.1678 0.0606 0.0797 0.0085 0.0362 0.0849 0.3059 0.7530 0.6759 0.4061 0.3722 0.4844
NA K10796 0.0029 0.1140 0.0000 6.3026 5.9366 21.5495 6.0836 0.1228 6.0554 0.1512 5.2583 0.0588 log-Gaussian 0.6475 NA -0.1362 0.0401 0.0008 -0.0214 -0.0017 0.0055 0.0337 -0.1204 -0.1554 0.0542 0.0601 0.0273 0.0102 -0.1983 -0.0885 0.0608 0.0533 0.0013 0.0979 -0.1772 -0.0652 0.0503 0.0662 0.0005 0.3259 0.0849 0.6849 0.6399 0.3433 0.7217 0.1076 0.7751
NA K18890 0.0000 0.9678 0.6334 25.4065 23.4384 58.9354 24.7482 0.9883 23.0303 0.9651 22.8075 0.9294 log-Gaussian 0.7226 NA -0.1203 0.0353 0.0008 0.0348 0.1346 -0.0063 0.0296 -0.1268 -0.1122 0.0478 0.0530 0.0085 0.0350 -0.1341 -0.1097 0.0538 0.0471 0.0133 0.0207 -0.1460 -0.0758 0.0444 0.0584 0.0011 0.1952 0.0849 0.6399 0.7034 0.4518 0.4836 0.1620 0.6919
NA K02082 0.0000 0.4708 0.4768 10.9479 9.3409 45.3290 9.6755 0.4912 9.4341 0.4884 8.5374 0.4118 log-Gaussian 0.7714 NA -0.1594 0.0472 0.0008 0.0475 -0.0552 -0.0007 0.0583 -0.1555 -0.1645 0.0637 0.0706 0.0153 0.0205 -0.1840 -0.1406 0.0718 0.0629 0.0110 0.0262 -0.1850 -0.1152 0.0594 0.0782 0.0020 0.1418 0.0860 0.6698 0.6846 0.4473 0.5210 0.2051 0.6483
NA K02769 0.0029 0.7632 0.0000 20.7022 18.0266 81.5632 16.3253 0.7661 18.1801 0.7442 22.1653 0.7765 log-Gaussian 0.7197 NA 0.1921 0.0568 0.0008 0.1095 -0.1216 -0.0073 -0.0183 0.2389 0.1343 0.0765 0.0847 0.0020 0.1140 0.1526 0.2223 0.0864 0.0756 0.0785 0.0036 0.2227 0.1390 0.0715 0.0941 0.0021 0.1409 0.0860 0.3597 0.7433 0.6843 0.4055 0.2051 0.6462
NA K00068 0.0000 0.8626 4.3246 19.4311 17.5620 64.9188 16.5446 0.8363 17.5824 0.8721 20.1357 0.9059 log-Gaussian 0.6073 NA 0.1466 0.0437 0.0009 0.0449 -0.0876 -0.0012 -0.0070 0.2305 0.0437 0.0586 0.0649 0.0001 0.5014 0.1088 0.1755 0.0664 0.0581 0.1023 0.0028 0.1582 0.1262 0.0550 0.0724 0.0044 0.0826 0.0888 0.1195 0.8581 0.7252 0.4004 0.3202 0.5904
NA K03833 0.0000 0.8830 2.4043 18.4745 17.1400 52.2558 17.5496 0.9123 17.7206 0.8721 16.2770 0.8353 log-Gaussian 0.7519 NA -0.1085 0.0322 0.0009 0.0223 -0.0484 0.0028 0.0345 -0.0730 -0.1519 0.0435 0.0482 0.0945 0.0018 -0.1332 -0.0895 0.0491 0.0430 0.0071 0.0382 -0.1507 -0.0355 0.0403 0.0530 0.0002 0.5030 0.0888 0.7321 0.3414 0.4171 0.5716 0.0717 0.8708
NA K21577 0.0000 0.1053 1.2826 6.4421 5.9219 20.3011 6.1947 0.1228 6.0014 0.1163 5.1688 0.0588 log-Gaussian 0.7194 NA -0.1095 0.0326 0.0009 -0.0225 -0.0160 0.0038 0.0274 -0.0954 -0.1266 0.0441 0.0488 0.0314 0.0101 -0.1334 -0.0912 0.0496 0.0435 0.0077 0.0368 -0.1566 -0.0280 0.0408 0.0537 0.0002 0.6020 0.0888 0.6882 0.6399 0.4214 0.5643 0.0638 0.9044
NA K01304 0.0000 0.9942 8.6652 45.6939 44.1047 111.1400 45.1066 1.0000 45.7141 0.9884 39.4443 0.9882 log-Gaussian 0.5747 NA -0.1146 0.0342 0.0009 0.0392 0.0517 -0.0005 0.0312 -0.1461 -0.0762 0.0460 0.0509 0.0016 0.1357 -0.1403 -0.0949 0.0521 0.0456 0.0076 0.0387 -0.1260 -0.0950 0.0430 0.0566 0.0036 0.0944 0.0897 0.3383 0.7486 0.4186 0.5716 0.2862 0.6040
NA K01239 0.0000 1.0000 21.7262 91.8180 87.4888 191.2760 91.2652 1.0000 86.6266 1.0000 82.8117 1.0000 log-Gaussian 0.5037 NA -0.0965 0.0289 0.0010 0.0147 0.0243 -0.0015 0.0142 -0.0903 -0.1042 0.0390 0.0432 0.0215 0.0166 -0.0948 -0.0977 0.0440 0.0386 0.0322 0.0118 -0.1071 -0.0783 0.0363 0.0478 0.0035 0.1029 0.0898 0.6846 0.6698 0.5355 0.4476 0.2769 0.6093
NA K02526 0.0000 0.6579 2.4754 14.9242 12.7851 52.0248 13.3288 0.6842 12.6246 0.6047 12.1132 0.6588 log-Gaussian 0.7785 NA -0.1294 0.0387 0.0010 0.0116 -0.0445 -0.0023 0.0533 -0.1152 -0.1470 0.0523 0.0579 0.0284 0.0117 -0.1131 -0.1419 0.0590 0.0516 0.0561 0.0064 -0.1472 -0.0987 0.0487 0.0641 0.0028 0.1250 0.0898 0.6849 0.6582 0.6324 0.4072 0.2353 0.6253
NA K01079 0.0000 0.8918 0.8281 24.4933 21.5427 81.1856 23.2068 0.9064 22.1598 0.9070 19.5643 0.8471 log-Gaussian 0.6907 NA -0.1527 0.0460 0.0010 -0.0085 0.0568 -0.0018 0.0390 -0.1856 -0.1126 0.0620 0.0686 0.0030 0.1020 -0.1392 -0.1631 0.0700 0.0613 0.0479 0.0083 -0.1850 -0.0970 0.0577 0.0760 0.0015 0.2032 0.0903 0.4183 0.7322 0.5978 0.4214 0.1845 0.6955
NA K01142 0.0000 1.0000 42.0330 152.8160 153.3920 205.2690 154.6360 1.0000 154.6825 1.0000 149.4610 1.0000 Gaussian 0.5047 NA -6.9164 2.0826 0.0010 1.7029 2.1126 0.3183 1.0194 -7.7870 -5.8544 2.8149 3.1174 0.0060 0.0614 -10.4908 -4.1731 3.1570 2.7655 0.0010 0.1324 -6.4255 -7.7733 2.6230 3.4526 0.0149 0.0251 0.0903 0.5964 0.7172 0.3224 0.7705 0.4063 0.4553
NA K10563 0.0000 0.6228 2.0462 14.1461 12.0160 63.7619 12.6007 0.6608 11.8714 0.6279 10.6932 0.5412 log-Gaussian 0.7686 NA -0.1329 0.0400 0.0010 0.0927 -0.1134 0.0061 0.0281 -0.0928 -0.1817 0.0537 0.0595 0.0853 0.0025 -0.2158 -0.0694 0.0604 0.0529 0.0004 0.1910 -0.1552 -0.0948 0.0498 0.0655 0.0020 0.1492 0.0903 0.7239 0.3982 0.2467 0.8093 0.2051 0.6548
NA K01200 0.0000 1.0000 17.4383 71.9868 68.7343 210.7560 69.7969 1.0000 67.2816 1.0000 69.5399 1.0000 log-Gaussian 0.6383 NA -0.1188 0.0363 0.0012 0.0218 -0.0321 -0.0046 0.0071 -0.1883 -0.0340 0.0483 0.0535 0.0001 0.5256 -0.1378 -0.1043 0.0553 0.0484 0.0133 0.0320 -0.1275 -0.1039 0.0457 0.0601 0.0056 0.0851 0.0953 0.1248 0.8687 0.4518 0.5337 0.3266 0.5949
NA K01620 0.0000 1.0000 37.7395 134.9037 135.8880 187.6610 136.8110 1.0000 135.5140 1.0000 134.7930 1.0000 Gaussian 0.5675 NA -5.8977 1.7999 0.0012 2.5086 -4.1186 -0.1261 1.3453 -6.3927 -5.2951 2.4351 2.6966 0.0091 0.0506 -5.0639 -6.5373 2.7377 2.3980 0.0654 0.0068 -7.8734 -2.4800 2.2575 2.9715 0.0006 0.4047 0.0953 0.6399 0.7149 0.6559 0.4171 0.1130 0.8182
NA K02759 0.0000 0.9971 9.1115 55.6837 48.9851 188.0850 52.1957 1.0000 46.6576 1.0000 45.5281 0.9882 Gaussian 0.7483 NA -6.9191 2.1199 0.0012 4.2451 -7.9718 -0.1818 2.4292 -6.9749 -6.8705 2.8539 3.1599 0.0152 0.0306 -7.8244 -6.2256 3.2292 2.8277 0.0161 0.0286 -7.5266 -5.8777 2.6646 3.5072 0.0051 0.0950 0.0953 0.6698 0.6849 0.4557 0.5254 0.3266 0.6040
NA K02773 0.0000 0.9240 2.1438 31.4024 27.3372 115.6650 25.9497 0.9123 27.4092 0.9419 29.3330 0.9294 log-Gaussian 0.6620 NA 0.1794 0.0550 0.0012 0.0568 -0.2374 -0.0052 -0.0112 0.2649 0.0744 0.0740 0.0819 0.0004 0.3643 0.1550 0.1981 0.0837 0.0733 0.0653 0.0073 0.2165 0.1151 0.0692 0.0911 0.0019 0.2076 0.0953 0.2157 0.8071 0.6559 0.4171 0.2051 0.6994
NA K05813 0.0000 0.9298 0.2910 22.0576 20.8965 74.6228 21.5483 0.9474 22.0428 0.9070 17.7679 0.9176 Gaussian 0.7585 NA -2.5400 0.7731 0.0012 -0.1577 -0.4793 -0.0865 0.9991 -2.2420 -2.9084 1.0440 1.1559 0.0327 0.0125 -2.6175 -2.4804 1.1768 1.0305 0.0270 0.0168 -3.0950 -1.5762 0.9724 1.2800 0.0016 0.2193 0.0953 0.6943 0.6698 0.5210 0.4602 0.1920 0.7067
NA K05967 0.0000 0.8187 1.5082 17.4080 15.5252 52.6908 14.2290 0.7895 15.2921 0.8140 17.6809 0.8824 log-Gaussian 0.5191 NA 0.1658 0.0508 0.0012 0.0426 -0.0877 0.0014 -0.0196 0.2979 0.0036 0.0676 0.0749 0.0000 0.9616 0.2027 0.1375 0.0772 0.0676 0.0091 0.0430 0.2099 0.0893 0.0638 0.0840 0.0011 0.2886 0.0953 0.0353 0.9959 0.4351 0.5816 0.1620 0.7581
NA K07473 0.0000 1.0000 89.8945 383.3194 373.5610 1047.4800 385.1940 1.0000 365.1540 1.0000 369.5050 1.0000 Gaussian 0.7561 NA -25.2270 7.7028 0.0012 12.4225 25.5730 0.0670 1.0102 -17.2517 -35.0419 10.3747 11.4868 0.0976 0.0025 -12.0463 -35.3349 11.6740 10.2222 0.3031 0.0006 -32.6436 -12.4027 9.6690 12.7267 0.0008 0.3307 0.0953 0.7321 0.3982 0.8456 0.2937 0.1410 0.7771
Pyruvate K14155 0.0000 1.0000 62.2066 177.4559 174.6795 290.3540 178.3600 1.0000 171.8095 1.0000 171.8740 1.0000 Gaussian 0.5836 NA -9.4576 2.8744 0.0011 1.3248 -5.1135 -0.1308 1.8782 -12.8344 -5.3189 3.8782 4.2946 0.0011 0.2166 -12.2066 -7.3483 4.3726 3.8297 0.0056 0.0561 -8.2958 -11.4712 3.6212 4.7664 0.0227 0.0168 0.0953 0.3059 0.7660 0.4072 0.6324 0.4414 0.4153
NA K19802 0.0000 0.7515 1.5922 13.9702 12.8162 35.3218 12.9433 0.7661 14.0819 0.7558 11.9373 0.7176 log-Gaussian 0.6519 NA -0.1233 0.0377 0.0012 0.0084 0.0085 -0.0033 0.0149 -0.1349 -0.1090 0.0510 0.0565 0.0087 0.0546 -0.0997 -0.1413 0.0572 0.0501 0.0824 0.0051 -0.1675 -0.0463 0.0471 0.0620 0.0004 0.4558 0.0953 0.6399 0.7149 0.6938 0.4072 0.1038 0.8468
NA K21576 0.0000 0.1316 1.0041 6.8332 6.3030 22.8655 6.6680 0.1637 6.2896 0.1279 5.6721 0.0706 Gaussian 0.7193 NA -0.9199 0.2797 0.0011 -0.1717 -0.0263 0.0278 0.2366 -0.9223 -0.9160 0.3784 0.4190 0.0155 0.0297 -1.1736 -0.7254 0.4254 0.3725 0.0062 0.0526 -1.1708 -0.4850 0.3515 0.4626 0.0010 0.2955 0.0953 0.6698 0.6849 0.4072 0.6204 0.1512 0.7605
NA K03711 0.0000 0.9708 2.7271 30.8006 27.2751 113.6430 28.1134 0.9825 26.3340 0.9651 24.0219 0.9529 log-Gaussian 0.6716 NA -0.1306 0.0402 0.0013 0.0349 -0.0326 -0.0014 0.0433 -0.1230 -0.1402 0.0542 0.0600 0.0242 0.0203 -0.1642 -0.1048 0.0612 0.0536 0.0078 0.0518 -0.1433 -0.1084 0.0507 0.0668 0.0051 0.1056 0.0993 0.6849 0.6846 0.4214 0.6165 0.3266 0.6119
NA K10670 0.0000 0.9152 3.7061 21.0458 19.5811 69.8346 20.7008 0.9298 18.8160 0.9186 16.7254 0.8824 log-Gaussian 0.7299 NA -0.1147 0.0354 0.0013 0.0013 0.0016 0.0040 0.0283 -0.1317 -0.0937 0.0479 0.0530 0.0063 0.0780 -0.1634 -0.0773 0.0537 0.0470 0.0026 0.1014 -0.1280 -0.0919 0.0444 0.0584 0.0042 0.1168 0.0993 0.6063 0.7231 0.4004 0.7234 0.3171 0.6175

Pathway Results

Overview of the numbers of significant genes
Code
data.frame("what"  = c("No. of pathways that didn't have low abundance",
                       "No. of significant genes (fresh intervention)",
                       "No. of significant genes (past. intervention)"),
           "value" = c(nrow(dat_PWresults),
                       sum(dat_PWresults$freshInt_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_PWresults$pastInt_pvalue_correct < 0.1, na.rm = TRUE))) %>% 
  kable() %>% 
  kable_styling()
what value
No. of pathways that didn't have low abundance 100
No. of significant genes (fresh intervention) 0
No. of significant genes (past. intervention) 0

… alternatively, looking at the raw p-values, not corrected for multiple testing

Code
data.frame("what"  = c("No. of pathways that didn't have low abundance",
                       "No. of significant genes (fresh intervention)",
                       "No. of significant genes (past. intervention)"),
           "value" = c(nrow(dat_PWresults),
                       sum(dat_PWresults$freshInt_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_PWresults$pastInt_pvalue < 0.05, na.rm = TRUE))) %>% 
  kable() %>% 
  kable_styling()
what value
No. of pathways that didn't have low abundance 100
No. of significant genes (fresh intervention) 0
No. of significant genes (past. intervention) 41
Overview which specific genes are significant (with the multiple testing correction)
Code
dat_PWresults %>% 
  filter((freshInt_pvalue_correct < 0.1) |
           (pastInt_pvalue_correct < 0.1)) %>% 
  mutate(across(contains("pvalue_correct"), function(x) { ifelse(x < 0.1, x, "not significant") })) %>% 
  dplyr::select(pathway, contains("pvalue_correct")) %>% 
  arrange(freshInt_pvalue_correct, pastInt_pvalue_correct) %>% 
  kable() %>% 
  kable_styling()
pathway freshInt_pvalue_correct pastInt_pvalue_correct freshInt_ageLow_pvalue_correct pastInt_ageLow_pvalue_correct freshInt_ageHigh_pvalue_correct pastInt_ageHigh_pvalue_correct freshInt_sexM_pvalue_correct pastInt_sexM_pvalue_correct freshInt_sexW_pvalue_correct pastInt_sexW_pvalue_correct freshInt_bmiLow_pvalue_correct pastInt_bmiLow_pvalue_correct freshInt_bmiHigh_pvalue_correct pastInt_bmiHigh_pvalue_correct
Overview which specific genes are significant (without multiple testing correction)
Code
dat_PWresults %>% 
  filter((freshInt_pvalue < 0.05) |
           (pastInt_pvalue < 0.05)) %>% 
  dplyr::select(-contains("correct")) %>% 
  mutate(across(contains("pvalue"), function(x) { ifelse(x < 0.1, x, "not significant") })) %>% 
  dplyr::select(pathway, contains("pvalue")) %>% 
  arrange(freshInt_pvalue, pastInt_pvalue) %>% 
  kable() %>% 
  kable_styling()
pathway freshInt_pvalue pastInt_pvalue freshInt_ageLow_pvalue freshInt_ageHigh_pvalue freshInt_sexM_pvalue freshInt_sexW_pvalue freshInt_bmiLow_pvalue freshInt_bmiHigh_pvalue pastInt_ageLow_pvalue pastInt_ageHigh_pvalue pastInt_sexM_pvalue pastInt_sexW_pvalue pastInt_bmiLow_pvalue pastInt_bmiHigh_pvalue
D-Glutamine and D-glutamate metabolism 0.0637 0.0020 not significant 0.0336 not significant not significant not significant not significant 0.0086 0.0844 0.0047 0.0988 0.0045 not significant
Quorum sensing 0.0944 0.0417 not significant 0.0766 not significant not significant 0.0125 not significant not significant 0.0815 0.084 not significant 0.0243 not significant
Streptomycin biosynthesis 0.0956 0.0488 not significant not significant not significant 0.0932 not significant not significant 0.0893 not significant not significant not significant 0.0287 not significant
Arabinogalactan biosynthesis - Mycobacterium not significant 0.0011 not significant not significant not significant not significant not significant not significant 0.0043 0.0856 0.0049 0.0597 0.0099 0.0446
Glyoxylate and dicarboxylate metabolism not significant 0.0031 not significant not significant not significant not significant not significant not significant 0.0126 0.0985 0.0616 0.0216 0.0019 not significant
Histidine metabolism not significant 0.0048 not significant not significant not significant not significant not significant not significant 0.0089 not significant 0.0196 0.0884 0.0059 not significant
Alanine, aspartate and glutamate metabolism not significant 0.0056 not significant not significant not significant not significant not significant not significant 0.0165 not significant 0.0289 0.0765 0.0055 not significant
Sphingolipid metabolism not significant 0.0058 not significant not significant not significant not significant not significant not significant 0.0009 not significant not significant 0.0095 0.0073 not significant
Phenylpropanoid biosynthesis not significant 0.0060 not significant not significant not significant 0.0978 not significant not significant 0.0102 not significant not significant 0.0222 0.014 not significant
Starch and sucrose metabolism not significant 0.0063 not significant 0.0773 not significant not significant 0.0252 not significant 0.0194 not significant 0.0233 not significant 0.0103 not significant
Amino sugar and nucleotide sugar metabolism not significant 0.0067 not significant not significant not significant not significant not significant not significant 0.0178 not significant 0.0589 0.051 0.0101 not significant
Arginine and proline metabolism not significant 0.0085 not significant not significant not significant not significant not significant not significant 0.0341 not significant 0.0801 0.0492 0.0044 not significant
Glycerophospholipid metabolism not significant 0.0099 not significant not significant not significant not significant not significant not significant not significant 0.0134 0.0209 not significant 0.0122 not significant
Purine metabolism not significant 0.0120 not significant 0.0883 not significant not significant 0.0686 not significant 0.0724 0.0784 0.0208 not significant 0.0138 not significant
Glycine, serine and threonine metabolism not significant 0.0120 not significant not significant not significant not significant not significant not significant 0.0327 not significant 0.0267 not significant 0.0085 not significant
Arginine biosynthesis not significant 0.0145 not significant not significant not significant not significant 0.0878 not significant 0.0533 not significant 0.031 not significant 0.0082 not significant
Nicotinate and nicotinamide metabolism not significant 0.0145 not significant 0.0712 not significant not significant 0.0562 not significant 0.0747 0.0942 0.0603 not significant 0.0282 not significant
unnamed not significant 0.0151 not significant not significant not significant not significant not significant not significant 0.0669 not significant not significant 0.0644 0.008 not significant
Biotin metabolism not significant 0.0186 not significant not significant not significant not significant not significant not significant 0.0453 not significant 0.0352 not significant 0.02 not significant
Lipoic acid metabolism not significant 0.0224 not significant not significant not significant not significant not significant not significant 0.0259 not significant not significant 0.0558 0.028 not significant
Porphyrin and chlorophyll metabolism not significant 0.0253 not significant not significant not significant not significant 0.0703 not significant 0.086 not significant 0.044 not significant 0.0067 not significant
Nitrogen metabolism not significant 0.0253 not significant 0.0752 not significant not significant 0.0651 not significant not significant not significant not significant not significant 0.0082 not significant
Biofilm formation - Vibrio cholerae not significant 0.0264 not significant not significant not significant not significant not significant not significant not significant 0.0957 not significant 0.0296 0.0192 not significant
Lysine biosynthesis not significant 0.0287 not significant 0.0926 not significant not significant not significant not significant 0.095 not significant 0.0243 not significant 0.0408 not significant
RNA degradation not significant 0.0293 not significant not significant not significant not significant not significant not significant 0.0674 not significant 0.0252 not significant 0.0421 not significant
Other glycan degradation not significant 0.0301 not significant not significant not significant not significant not significant not significant 0.0097 not significant not significant 0.0297 0.04 not significant
Glutathione metabolism not significant 0.0308 not significant 0.042 not significant not significant not significant not significant 0.0972 not significant not significant not significant 0.0147 not significant
DNA replication not significant 0.0319 not significant not significant not significant not significant not significant not significant 0.0928 not significant 0.004 not significant 0.0507 not significant
Terpenoid backbone biosynthesis not significant 0.0320 not significant 0.0815 not significant not significant not significant not significant 0.0677 not significant 0.0741 not significant 0.0361 not significant
Protein export not significant 0.0352 not significant not significant not significant not significant not significant not significant 0.083 not significant 0.0095 not significant 0.052 not significant
Selenocompound metabolism not significant 0.0353 not significant not significant not significant not significant not significant not significant 0.0851 not significant 0.0665 not significant 0.0626 not significant
Peptidoglycan biosynthesis not significant 0.0364 not significant 0.0829 not significant not significant 0.0712 not significant not significant not significant 0.006 not significant 0.0633 not significant
Nucleotide excision repair not significant 0.0384 not significant not significant not significant not significant not significant not significant 0.0957 not significant 0.0117 not significant 0.0741 not significant
Thiamine metabolism not significant 0.0391 not significant 0.0783 not significant not significant 0.0521 not significant 0.0954 not significant 0.0595 not significant 0.0419 not significant
Galactose metabolism not significant 0.0395 not significant not significant not significant not significant not significant not significant 0.0895 not significant not significant not significant 0.0916 not significant
Phenylalanine metabolism not significant 0.0401 not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.079 0.0041 not significant
Aminoacyl-tRNA biosynthesis not significant 0.0424 not significant not significant not significant not significant not significant not significant not significant not significant 0.0059 not significant 0.0689 not significant
Pantothenate and CoA biosynthesis not significant 0.0426 not significant not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.0289 not significant
Valine, leucine and isoleucine degradation not significant 0.0447 not significant not significant not significant not significant not significant not significant not significant not significant not significant 0.0936 0.0481 not significant
Cell cycle - Caulobacter not significant 0.0451 not significant not significant not significant not significant not significant not significant not significant not significant 0.0084 not significant not significant not significant
Glycerolipid metabolism not significant 0.0484 not significant not significant not significant not significant 0.0519 not significant not significant not significant 0.0245 not significant 0.0295 not significant
Figure for publication
Code
# prepare the base dataset for the plot
dat_PWsignif <- dat_PWresults %>% 
  filter((freshInt_pvalue < 0.05) | (pastInt_pvalue < 0.05))
dat_plot <- data.frame(pathway      = rep(dat_PWsignif$pathway, times = 2),
                       baseline_medianTPMAbd = rep(dat_PWsignif$baseline_medianTPMAbd, times = 2),
                       model        = rep(dat_PWsignif$model, times = 2),
                       intervention = rep(c("Fresh intervention", "Pasteurized intervention"), each = nrow(dat_PWsignif)),
                       effect       = c(dat_PWsignif$freshInt_effect, dat_PWsignif$pastInt_effect),
                       pvalue       = c(dat_PWsignif$freshInt_pvalue, dat_PWsignif$pastInt_pvalue),
                       pastEffect_forSorting = rep(dat_PWsignif$pastInt_effect, times = 2)) %>% 
  dplyr::left_join(dat_PW %>% select(taxonomy_level3, taxonomy_level1) %>% distinct(taxonomy_level3, .keep_all = TRUE), by = c("pathway" = "taxonomy_level3")) %>% 
  arrange(desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((pvalue < 0.05) & (intervention == "Fresh intervention") ~ "significant (fresh intervention)",
                                   pvalue < 0.05 ~ "significant (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         effect_label  = case_when((model == "Gaussian") & (effect >= 1)        ~ paste0("+", round(effect, 0), " TPM"),
                                   (model == "Gaussian") & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " TPM"),
                                   (model == "Gaussian") & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " TPM"),
                                   (model == "Gaussian") & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " TPM"),
                                   (model == "Gaussian") & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " TPM"),
                                   (model == "Gaussian") & (effect >= 0)        ~ "+ <0.0001 TPM",
                                   (model == "Gaussian") & (effect <= -1)       ~ paste0(round(effect, 0), " TPM"),
                                   (model == "Gaussian") & (effect <= -0.1)     ~ paste0(round(effect, 1), " TPM"),
                                   (model == "Gaussian") & (effect <= -0.01)    ~ paste0(round(effect, 2), " TPM"),
                                   (model == "Gaussian") & (effect <= -0.001)   ~ paste0(round(effect, 3), " TPM"),
                                   (model == "Gaussian") & (effect <= -0.0001)  ~ paste0(round(effect, 4), " TPM"),
                                   (model == "Gaussian") & (effect < 0)                   ~ "- >-0.0001 TPM",
                                   (model == "log-Gaussian") & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model == "log-Gaussian") & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model == "log-Gaussian") & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model == "log-Gaussian") & (effect >= 0)                                ~ "+ <0.01%",
                                   (model == "log-Gaussian") & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model == "log-Gaussian") & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model == "log-Gaussian") & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model == "log-Gaussian") & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$pathway)), function(pw) {
  dat_pw <- dat_plot %>% filter(pathway == pw)
  dat_pw %>% 
    dplyr::bind_rows(data.frame(taxonomy_level1 = dat_pw$taxonomy_level1[1],
                                pathway       = pw,
                                intervention  = "Baseline median TPM abd.",
                                pvalue_signif = "",
                                effect_label  = case_when(dat_pw$baseline_medianTPMAbd < .0001 ~ "<0.0001 TPM",
                                                          dat_pw$baseline_medianTPMAbd < .001  ~ paste0(round(dat_pw$baseline_medianTPMAbd, 4), " TPM"),
                                                          dat_pw$baseline_medianTPMAbd < .01   ~ paste0(round(dat_pw$baseline_medianTPMAbd, 3), " TPM"),
                                                          dat_pw$baseline_medianTPMAbd < .1    ~ paste0(round(dat_pw$baseline_medianTPMAbd, 2), " TPM"),
                                                          dat_pw$baseline_medianTPMAbd < 1     ~ paste0(round(dat_pw$baseline_medianTPMAbd, 1), " TPM"),
                                                          TRUE ~ paste0(round(dat_pw$baseline_medianTPMAbd, 0), " TPM"))))
})
dat_plot <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(pathway      = factor(pathway, levels = rev(unique(pathway))),
         intervention = factor(intervention, levels = c("Baseline median TPM abd.", "Fresh intervention", "Pasteurized intervention")),
         taxonomy_level1 = case_when(taxonomy_level1 == "Cellular Processes"             ~ "CP",
                                     taxonomy_level1 == "Genetic Information Processing" ~ "GIP",
                                     TRUE                                                ~ taxonomy_level1),
         taxonomy_level1 = factor(taxonomy_level1, levels = c("CP", "GIP", "Metabolism")))


# plot
dat_plot %>% 
  ggplot(aes(x = intervention, y = pathway)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("uncorrected p-value", values = c("not significant" = "gray95",
                                                      "significant (fresh intervention)" = "#1F78B4",
                                                      "significant (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white") +
  facet_grid(rows = vars(taxonomy_level1), scales = "free", space = "free") +
  ggtitle("Intervention effects on pathways") +
  theme(axis.title = element_blank(),
        panel.grid = element_blank(),
        strip.background = element_rect(fill = "gray97", color = "gray97"))

Code
# ggsave("FigureS21_pathwayScreening_pvalue.png", width = 10, height = 10)

Results for interaction effects

Overview of the numbers of significant genes
Code
data.frame("what"  = c("No. of pathways that didn't have low abundance",
                       "No. of significant pathways (fresh intervention - age low)",
                       "No. of significant pathways (fresh intervention - age high)",
                       "No. of significant pathways (past. intervention - age low)",
                       "No. of significant pathways (past. intervention - age high)",
                       "No. of significant pathways (fresh intervention - gender male)",
                       "No. of significant pathways (fresh intervention - gender female)",
                       "No. of significant pathways (past. intervention - gender male)",
                       "No. of significant pathways (past. intervention - gender female)",
                       "No. of significant pathways (fresh intervention - BMI low)",
                       "No. of significant pathways (fresh intervention - BMI high)",
                       "No. of significant pathways (past. intervention - BMI low)",
                       "No. of significant pathways (past. intervention - BMI high)"),
           "value" = c(nrow(dat_PWresults),
                       sum(dat_PWresults$freshInt_ageLow_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_PWresults$freshInt_ageHigh_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_PWresults$pastInt_ageLow_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_PWresults$pastInt_ageHigh_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_PWresults$freshInt_sexM_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_PWresults$freshInt_sexW_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_PWresults$pastInt_sexM_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_PWresults$pastInt_sexW_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_PWresults$freshInt_bmiLow_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_PWresults$freshInt_bmiHigh_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_PWresults$pastInt_bmiLow_pvalue_correct < 0.1, na.rm = TRUE),
                       sum(dat_PWresults$pastInt_bmiHigh_pvalue_correct < 0.1, na.rm = TRUE))) %>% 
  kable() %>% 
  kable_styling()
what value
No. of pathways that didn't have low abundance 100
No. of significant pathways (fresh intervention - age low) 0
No. of significant pathways (fresh intervention - age high) 0
No. of significant pathways (past. intervention - age low) 0
No. of significant pathways (past. intervention - age high) 0
No. of significant pathways (fresh intervention - gender male) 0
No. of significant pathways (fresh intervention - gender female) 0
No. of significant pathways (past. intervention - gender male) 0
No. of significant pathways (past. intervention - gender female) 0
No. of significant pathways (fresh intervention - BMI low) 0
No. of significant pathways (fresh intervention - BMI high) 0
No. of significant pathways (past. intervention - BMI low) 0
No. of significant pathways (past. intervention - BMI high) 0
… alternatively looking at the uncorrected p-values
Code
data.frame("what"  = c("No. of pathways that didn't have low abundance",
                       "No. of significant pathways (fresh intervention - age low)",
                       "No. of significant pathways (fresh intervention - age high)",
                       "No. of significant pathways (past. intervention - age low)",
                       "No. of significant pathways (past. intervention - age high)",
                       "No. of significant pathways (fresh intervention - gender male)",
                       "No. of significant pathways (fresh intervention - gender female)",
                       "No. of significant pathways (past. intervention - gender male)",
                       "No. of significant pathways (past. intervention - gender female)",
                       "No. of significant pathways (fresh intervention - BMI low)",
                       "No. of significant pathways (fresh intervention - BMI high)",
                       "No. of significant pathways (past. intervention - BMI low)",
                       "No. of significant pathways (past. intervention - BMI high)"),
           "value" = c(nrow(dat_PWresults),
                       sum(dat_PWresults$freshInt_ageLow_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_PWresults$freshInt_ageHigh_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_PWresults$pastInt_ageLow_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_PWresults$pastInt_ageHigh_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_PWresults$freshInt_sexM_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_PWresults$freshInt_sexW_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_PWresults$pastInt_sexM_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_PWresults$pastInt_sexW_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_PWresults$freshInt_bmiLow_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_PWresults$freshInt_bmiHigh_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_PWresults$pastInt_bmiLow_pvalue < 0.05, na.rm = TRUE),
                       sum(dat_PWresults$pastInt_bmiHigh_pvalue < 0.05, na.rm = TRUE))) %>% 
  kable() %>% 
  kable_styling()
what value
No. of pathways that didn't have low abundance 100
No. of significant pathways (fresh intervention - age low) 0
No. of significant pathways (fresh intervention - age high) 3
No. of significant pathways (past. intervention - age low) 17
No. of significant pathways (past. intervention - age high) 2
No. of significant pathways (fresh intervention - gender male) 0
No. of significant pathways (fresh intervention - gender female) 0
No. of significant pathways (past. intervention - gender male) 28
No. of significant pathways (past. intervention - gender female) 7
No. of significant pathways (fresh intervention - BMI low) 6
No. of significant pathways (fresh intervention - BMI high) 1
No. of significant pathways (past. intervention - BMI low) 36
No. of significant pathways (past. intervention - BMI high) 1

Age

Figure for publication
Code
# prepare the base dataset for the plot
dat_signif <- dat_PWresults %>% 
  filter((freshInt_ageLow_pvalue < 0.05) | (freshInt_ageHigh_pvalue < 0.05) |
           (pastInt_ageLow_pvalue < 0.05) | (pastInt_ageHigh_pvalue < 0.05))
dat_plot <- data.frame(pathway       = rep(dat_signif$pathway,  times = 4),
                       baseline_medianTPMAbd = rep(dat_signif$baseline_medianTPMAbd, times = 4),
                       model         = rep(dat_signif$model, times = 4),
                       intervention  = rep(c("Fresh intervention\n(age low)", "Fresh intervention\n(age high)",
                                             "Pasteurized intervention\n(age low)", "Pasteurized intervention\n(age high)"), each = nrow(dat_signif)),
                       effect        = c(dat_signif$freshInt_ageLow_effect, dat_signif$freshInt_ageHigh_effect,
                                         dat_signif$pastInt_ageLow_effect,  dat_signif$pastInt_ageHigh_effect),
                       pvalue        = c(dat_signif$freshInt_ageLow_pvalue, dat_signif$freshInt_ageHigh_pvalue,
                                         dat_signif$pastInt_ageLow_pvalue,  dat_signif$pastInt_ageHigh_pvalue),
                       pastEffect_forSorting = rep(dat_signif$pastInt_ageLow_effect, times = 4)) %>% 
  dplyr::left_join(dat_PW %>% select(taxonomy_level3, taxonomy_level1) %>% distinct(taxonomy_level3, .keep_all = TRUE), by = c("pathway" = "taxonomy_level3")) %>% 
  arrange(desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((pvalue < 0.05) & grepl("Fresh", intervention) ~ "significant (fresh intervention)",
                                   pvalue < 0.05 ~ "significant (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         effect_label  = case_when((model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 1)        ~ paste0("+", round(effect, 0), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0)        ~ "+ <0.0001 TPM",
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -1)       ~ paste0(round(effect, 0), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.1)     ~ paste0(round(effect, 1), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.01)    ~ paste0(round(effect, 2), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.001)   ~ paste0(round(effect, 3), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.0001)  ~ paste0(round(effect, 4), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect < 0)                   ~ "- >-0.0001 TPM",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect >= 0)                                ~ "+ <0.01%",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$pathway)), function(pw) {
  dat_pw <- dat_plot %>% filter(pathway == pw)
  dat_pw %>% 
    dplyr::bind_rows(data.frame(taxonomy_level1 = dat_pw$taxonomy_level1[1],
                                pathway         = pw,
                                intervention    = "Baseline median TPM abd.",
                                pvalue_signif   = "",
                                effect_label    = case_when(dat_pw$baseline_medianTPMAbd < .0001 ~ "<0.0001 TPM",
                                                            dat_pw$baseline_medianTPMAbd < .001  ~ paste0(round(dat_pw$baseline_medianTPMAbd, 4), " TPM"),
                                                            dat_pw$baseline_medianTPMAbd < .01   ~ paste0(round(dat_pw$baseline_medianTPMAbd, 3), " TPM"),
                                                            dat_pw$baseline_medianTPMAbd < .1    ~ paste0(round(dat_pw$baseline_medianTPMAbd, 2), " TPM"),
                                                            dat_pw$baseline_medianTPMAbd < 1     ~ paste0(round(dat_pw$baseline_medianTPMAbd, 1), " TPM"),
                                                            TRUE ~ paste0(round(dat_pw$baseline_medianTPMAbd, 0), " TPM"))))
})
dat_plot <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(pathway      = factor(pathway, levels = rev(unique(pathway))),
         intervention = gsub(intervention, pattern = "age low",  replacement = "age 21-49"),
         intervention = gsub(intervention, pattern = "age high", replacement = "age 50-69"),
         intervention = factor(intervention, levels = c("Baseline median TPM abd.",
                                                        "Fresh intervention\n(age 21-49)",
                                                        "Fresh intervention\n(age 50-69)",
                                                        "Pasteurized intervention\n(age 21-49)",
                                                        "Pasteurized intervention\n(age 50-69)")),
         taxonomy_level1 = case_when(taxonomy_level1 == "Cellular Processes"             ~ "CP",
                                     taxonomy_level1 == "Genetic Information Processing" ~ "GIP",
                                     TRUE                                                ~ taxonomy_level1),
         taxonomy_level1 = factor(taxonomy_level1, levels = c("CP", "GIP", "Metabolism")))

# plot
dat_plot %>% 
  ggplot(aes(x = intervention, y = pathway)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("uncorrected p-value", values = c("not significant" = "gray95",
                                                      "significant (fresh intervention)" = "#1F78B4",
                                                      "significant (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white") +
  facet_grid(rows = vars(taxonomy_level1), scales = "free", space = "free") +
  ggtitle("Intervention effects on pathways") +
  theme(axis.title = element_blank(),
        panel.grid = element_blank(),
        strip.background = element_rect(fill = "gray97", color = "gray97"))

Code
# ggsave("FigureS22_pathwayScreening_ageStrat_pvalues.png", width = 13, height = 8)

Sex

Figure for publication
Code
# prepare the base dataset for the plot
dat_signif <- dat_PWresults %>% 
  filter((freshInt_sexM_pvalue < 0.05) | (freshInt_sexW_pvalue < 0.05) |
           (pastInt_sexM_pvalue < 0.05) | (pastInt_sexW_pvalue < 0.05))
dat_plot <- data.frame(pathway       = rep(dat_signif$pathway,  times = 4),
                       baseline_medianTPMAbd = rep(dat_signif$baseline_medianTPMAbd, times = 4),
                       model         = rep(dat_signif$model, times = 4),
                       intervention  = rep(c("Fresh intervention\n(gender male)", "Fresh intervention\n(gender female)",
                                             "Pasteurized intervention\n(gender male)", "Pasteurized intervention\n(gender female)"), each = nrow(dat_signif)),
                       effect        = c(dat_signif$freshInt_sexM_effect, dat_signif$freshInt_sexW_effect,
                                         dat_signif$pastInt_sexM_effect,  dat_signif$pastInt_sexW_effect),
                       pvalue        = c(dat_signif$freshInt_sexM_pvalue, dat_signif$freshInt_sexW_pvalue,
                                         dat_signif$pastInt_sexM_pvalue,  dat_signif$pastInt_sexW_pvalue),
                       pastEffect_forSorting = rep(dat_signif$pastInt_sexM_effect, times = 4)) %>% 
  dplyr::left_join(dat_PW %>% select(taxonomy_level3, taxonomy_level1) %>% distinct(taxonomy_level3, .keep_all = TRUE), by = c("pathway" = "taxonomy_level3")) %>% 
  arrange(desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((pvalue < 0.05) & grepl("Fresh", intervention) ~ "significant (fresh intervention)",
                                   pvalue < 0.05 ~ "significant (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         effect_label  = case_when((model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 1)        ~ paste0("+", round(effect, 0), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0)        ~ "+ <0.0001 TPM",
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -1)       ~ paste0(round(effect, 0), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.1)     ~ paste0(round(effect, 1), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.01)    ~ paste0(round(effect, 2), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.001)   ~ paste0(round(effect, 3), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.0001)  ~ paste0(round(effect, 4), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect < 0)                   ~ "- >-0.0001 TPM",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect >= 0)                                ~ "+ <0.01%",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$pathway)), function(pw) {
  dat_pw <- dat_plot %>% filter(pathway == pw)
  dat_pw %>% 
    dplyr::bind_rows(data.frame(taxonomy_level1 = dat_pw$taxonomy_level1[1],
                                pathway         = pw,
                                intervention    = "Baseline median TPM abd.",
                                pvalue_signif   = "",
                                effect_label    = case_when(dat_pw$baseline_medianTPMAbd < .0001 ~ "<0.0001 TPM",
                                                            dat_pw$baseline_medianTPMAbd < .001  ~ paste0(round(dat_pw$baseline_medianTPMAbd, 4), " TPM"),
                                                            dat_pw$baseline_medianTPMAbd < .01   ~ paste0(round(dat_pw$baseline_medianTPMAbd, 3), " TPM"),
                                                            dat_pw$baseline_medianTPMAbd < .1    ~ paste0(round(dat_pw$baseline_medianTPMAbd, 2), " TPM"),
                                                            dat_pw$baseline_medianTPMAbd < 1     ~ paste0(round(dat_pw$baseline_medianTPMAbd, 1), " TPM"),
                                                            TRUE ~ paste0(round(dat_pw$baseline_medianTPMAbd, 0), " TPM"))))
})
dat_plot <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(pathway      = factor(pathway, levels = rev(unique(pathway))),
         intervention = gsub(intervention, pattern = "gender ", replacement = ""),
         intervention = factor(intervention, levels = c("Baseline median TPM abd.",
                                                        "Fresh intervention\n(male)",
                                                        "Fresh intervention\n(female)",
                                                        "Pasteurized intervention\n(male)",
                                                        "Pasteurized intervention\n(female)")),
         taxonomy_level1 = case_when(taxonomy_level1 == "Cellular Processes" ~ "CP",
                                     TRUE                                    ~ taxonomy_level1),
         taxonomy_level1 = factor(taxonomy_level1, levels = c("CP", "Genetic Information Processing", "Metabolism")))

# plot
dat_plot %>% 
  ggplot(aes(x = intervention, y = pathway)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("uncorrected p-value", values = c("not significant" = "gray95",
                                                      "significant (fresh intervention)" = "#1F78B4",
                                                      "significant (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white") +
  facet_grid(rows = vars(taxonomy_level1), scales = "free", space = "free") +
  ggtitle("Intervention effects on pathways") +
  theme(axis.title = element_blank(),
        panel.grid = element_blank(),
        strip.background = element_rect(fill = "gray97", color = "gray97"))

Code
# ggsave("FigureS23_pathwayScreening_sexStrat_pvalues.png", width = 13, height = 10)

BMI

Figure for publication
Code
# prepare the base dataset for the plot
dat_signif <- dat_PWresults %>% 
  filter((freshInt_bmiLow_pvalue < 0.05) | (freshInt_bmiHigh_pvalue < 0.05) |
           (pastInt_bmiLow_pvalue < 0.05) | (pastInt_bmiHigh_pvalue < 0.05))
dat_plot <- data.frame(pathway       = rep(dat_signif$pathway,  times = 4),
                       baseline_medianTPMAbd = rep(dat_signif$baseline_medianTPMAbd, times = 4),
                       model         = rep(dat_signif$model, times = 4),
                       intervention  = rep(c("Fresh intervention\n(BMI low)", "Fresh intervention\n(BMI high)",
                                             "Pasteurized intervention\n(BMI low)", "Pasteurized intervention\n(BMI high)"), each = nrow(dat_signif)),
                       effect        = c(dat_signif$freshInt_bmiLow_effect, dat_signif$freshInt_bmiHigh_effect,
                                         dat_signif$pastInt_bmiLow_effect,  dat_signif$pastInt_bmiHigh_effect),
                       pvalue        = c(dat_signif$freshInt_bmiLow_pvalue, dat_signif$freshInt_bmiHigh_pvalue,
                                         dat_signif$pastInt_bmiLow_pvalue,  dat_signif$pastInt_bmiHigh_pvalue),
                       pastEffect_forSorting = rep(dat_signif$pastInt_bmiLow_effect, times = 4)) %>% 
  dplyr::left_join(dat_PW %>% select(taxonomy_level3, taxonomy_level1) %>% distinct(taxonomy_level3, .keep_all = TRUE), by = c("pathway" = "taxonomy_level3")) %>% 
  arrange(desc(pastEffect_forSorting)) %>% 
  mutate(pvalue_signif = case_when((pvalue < 0.05) & grepl("Fresh", intervention) ~ "significant (fresh intervention)",
                                   pvalue < 0.05 ~ "significant (pasteurized intervention)",
                                   TRUE          ~ "not significant"),
         effect_label  = case_when((model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 1)        ~ paste0("+", round(effect, 0), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.1)      ~ paste0("+", round(effect, 1), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.01)     ~ paste0("+", round(effect, 2), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.001)    ~ paste0("+", round(effect, 3), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0.0001)   ~ paste0("+", round(effect, 4), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect >= 0)        ~ "+ <0.0001 TPM",
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -1)       ~ paste0(round(effect, 0), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.1)     ~ paste0(round(effect, 1), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.01)    ~ paste0(round(effect, 2), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.001)   ~ paste0(round(effect, 3), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect <= -0.0001)  ~ paste0(round(effect, 4), " TPM"),
                                   (model %in% c("Gaussian", "ZI Gaussian")) & (effect < 0)                   ~ "- >-0.0001 TPM",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 1)             ~ paste0("+", round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.1)           ~ paste0("+", round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) >= 0.01)          ~ paste0("+", round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect >= 0)                                ~ "+ <0.01%",
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -1)            ~ paste0(round((100 * (exp(effect) - 1)), 0), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.1)          ~ paste0(round((100 * (exp(effect) - 1)), 1), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & ((100 * (exp(effect) - 1)) <= -0.01)         ~ paste0(round((100 * (exp(effect) - 1)), 2), "%"),
                                   (model %in% c("log-Gaussian", "ZI log-Gaussian")) & (effect < 0)                                 ~ "- >-0.01%"))

# artificially add the baseline rel. abd. as an 'intervention' and 'effect_label' to be able to nicely plot it
dat_plot_list <- lapply(as.character(unique(dat_plot$pathway)), function(pw) {
  dat_pw <- dat_plot %>% filter(pathway == pw)
  dat_pw %>% 
    dplyr::bind_rows(data.frame(taxonomy_level1 = dat_pw$taxonomy_level1[1],
                                pathway         = pw,
                                intervention    = "Baseline median TPM abd.",
                                pvalue_signif   = "",
                                effect_label    = case_when(dat_pw$baseline_medianTPMAbd < .0001 ~ "<0.0001 TPM",
                                                            dat_pw$baseline_medianTPMAbd < .001  ~ paste0(round(dat_pw$baseline_medianTPMAbd, 4), " TPM"),
                                                            dat_pw$baseline_medianTPMAbd < .01   ~ paste0(round(dat_pw$baseline_medianTPMAbd, 3), " TPM"),
                                                            dat_pw$baseline_medianTPMAbd < .1    ~ paste0(round(dat_pw$baseline_medianTPMAbd, 2), " TPM"),
                                                            dat_pw$baseline_medianTPMAbd < 1     ~ paste0(round(dat_pw$baseline_medianTPMAbd, 1), " TPM"),
                                                            TRUE ~ paste0(round(dat_pw$baseline_medianTPMAbd, 0), " TPM"))))
})
dat_plot <- dat_plot_list %>% dplyr::bind_rows() %>% 
  mutate(pathway      = factor(pathway, levels = rev(unique(pathway))),
         intervention = gsub(intervention, pattern = "low",  replacement = "[18, 25)"),
         intervention = gsub(intervention, pattern = "high", replacement = "[25, 31]"),
         intervention = factor(intervention, levels = c("Baseline median TPM abd.",
                                                        "Fresh intervention\n(BMI [18, 25))",
                                                        "Fresh intervention\n(BMI [25, 31])",
                                                        "Pasteurized intervention\n(BMI [18, 25))",
                                                        "Pasteurized intervention\n(BMI [25, 31])")),
         taxonomy_level1 = case_when(taxonomy_level1 == "Cellular Processes"             ~ "CP",
                                     taxonomy_level1 == "Genetic Information Processing" ~ "GIP",
                                     TRUE                                                ~ taxonomy_level1),
         taxonomy_level1 = factor(taxonomy_level1, levels = c("CP", "GIP", "Metabolism")))

# plot
dat_plot %>% 
  ggplot(aes(x = intervention, y = pathway)) +
  geom_tile(aes(fill = pvalue_signif)) +
  geom_text(aes(label = effect_label)) +
  scale_fill_manual("uncorrected p-value", values = c("not significant" = "gray95",
                                                      "significant (fresh intervention)" = "#1F78B4",
                                                      "significant (pasteurized intervention)" = "#FF7F00"),
                    na.value = "white") +
  facet_grid(rows = vars(taxonomy_level1), scales = "free", space = "free") +
  ggtitle("Intervention effects on pathways") +
  theme(axis.title = element_blank(),
        panel.grid = element_blank(),
        strip.background = element_rect(fill = "gray97", color = "gray97"))

Code
# ggsave("FigureS24_pathwayScreening_bmiStrat_pvalues.png", width = 13, height = 10)

Joint plot of condensed information

Focus on the four biggest phylum groups: Firmicutes, Bacteroidetes, Actinobacteria and Proteobacteria

Code
# data preparation
plot_dat_wide <- dat_PWresults %>% 
  dplyr::left_join(dat_PW %>% select(taxonomy_level1, taxonomy_level3) %>% distinct(taxonomy_level3, .keep_all = T),
                   by = c("pathway" = "taxonomy_level3")) %>% 
  group_by(taxonomy_level1) %>% 
  dplyr::summarize(n_pathways                   = n(),
                   n_freshSignifEffects_overall = sum(freshInt_pvalue         < 0.05),
                   n_freshSignifEffects_ageLow  = sum(freshInt_ageLow_pvalue  < 0.05),
                   n_freshSignifEffects_ageHigh = sum(freshInt_ageHigh_pvalue < 0.05),
                   n_freshSignifEffects_sexM    = sum(freshInt_sexM_pvalue    < 0.05),
                   n_freshSignifEffects_sexW    = sum(freshInt_sexW_pvalue    < 0.05),
                   n_freshSignifEffects_bmiLow  = sum(freshInt_bmiLow_pvalue  < 0.05),
                   n_freshSignifEffects_bmiHigh = sum(freshInt_bmiHigh_pvalue < 0.05),
                   n_pastSignifEffects_overall  = sum(pastInt_pvalue          < 0.05),
                   n_pastSignifEffects_ageLow   = sum(pastInt_ageLow_pvalue   < 0.05),
                   n_pastSignifEffects_ageHigh  = sum(pastInt_ageHigh_pvalue  < 0.05),
                   n_pastSignifEffects_sexM     = sum(pastInt_sexM_pvalue     < 0.05),
                   n_pastSignifEffects_sexW     = sum(pastInt_sexW_pvalue     < 0.05),
                   n_pastSignifEffects_bmiLow   = sum(pastInt_bmiLow_pvalue   < 0.05),
                   n_pastSignifEffects_bmiHigh  = sum(pastInt_bmiHigh_pvalue  < 0.05))


# reshape dataset to long format
plot_dat_long <- data.frame(taxonomy_level1      = rep(plot_dat_wide$taxonomy_level1, times = sum(grepl("freshSignifEffects", colnames(plot_dat_wide)))),
                            n_pathways           = rep(plot_dat_wide$n_pathways, times = sum(grepl("freshSignifEffects", colnames(plot_dat_wide)))),
                            effect_group         = rep(c("overall", "ageInt", "ageInt", "sexInt", "sexInt", "bmiInt", "bmiInt"),
                                                       each = nrow(plot_dat_wide)),
                            effect_type          = rep(c("overall", "ageLow", "ageHigh", "sexM", "sexW", "bmiLow", "bmiHigh"),
                                                       each = nrow(plot_dat_wide)),
                            n_freshSignifEffects = c(plot_dat_wide$n_freshSignifEffects_overall,
                                                     plot_dat_wide$n_freshSignifEffects_ageLow,
                                                     plot_dat_wide$n_freshSignifEffects_ageHigh,
                                                     plot_dat_wide$n_freshSignifEffects_sexM,
                                                     plot_dat_wide$n_freshSignifEffects_sexW,
                                                     plot_dat_wide$n_freshSignifEffects_bmiLow,
                                                     plot_dat_wide$n_freshSignifEffects_bmiHigh),
                            n_pastSignifEffects  = c(plot_dat_wide$n_pastSignifEffects_overall,
                                                     plot_dat_wide$n_pastSignifEffects_ageLow,
                                                     plot_dat_wide$n_pastSignifEffects_ageHigh,
                                                     plot_dat_wide$n_pastSignifEffects_sexM,
                                                     plot_dat_wide$n_pastSignifEffects_sexW,
                                                     plot_dat_wide$n_pastSignifEffects_bmiLow,
                                                     plot_dat_wide$n_pastSignifEffects_bmiHigh)) %>% 
  mutate(taxLevel1_label = paste0(taxonomy_level1, "\n", n_pathways, " pathways"),
         effect_group    = factor(effect_group, levels = c("overall", "ageInt", "sexInt", "bmiInt")),
         effect_type     = factor(effect_type, levels = rev(c("overall", "ageLow", "ageHigh", "sexM", "sexW", "bmiLow", "bmiHigh"))))

# make the dataset even longer
plot_dat_longer <- data.frame(taxonomy_level1 = rep(plot_dat_long$taxonomy_level1, times = 2),
                              n_pathways      = rep(plot_dat_long$n_pathways,      times = 2),
                              taxLevel1_label = rep(plot_dat_long$taxLevel1_label, times = 2),
                              effect_group    = rep(plot_dat_long$effect_group,    times = 2),
                              effect_type     = rep(plot_dat_long$effect_type,     times = 2),
                              intervention    = rep(c("Fresh", "Past."), each = nrow(plot_dat_long)),
                              parameter       = "n_signifEffects",
                              value           = c(plot_dat_long$n_freshSignifEffects,
                                                  plot_dat_long$n_pastSignifEffects)) %>% 
  group_by(taxonomy_level1, parameter, effect_group, intervention) %>% 
  mutate(alpha_value = case_when(effect_group == "overall"  ~ 0.8,
                                 value == min(value)        ~ 0,
                                 TRUE                       ~ 1)) %>% 
  ungroup() %>% 
  mutate(fill_tiles = case_when(effect_type == "overall" ~ "no",
                                TRUE ~ taxonomy_level1),
         alpha_tiles = case_when(effect_type %in% c("ageLow", "sexM", "bmiLow") ~ 0.05,
                                 TRUE ~ 0.15))
plot_dat_longer <- plot_dat_longer %>%
  mutate(taxLevel1_label = factor(taxLevel1_label, levels = plot_dat_longer %>% arrange(desc(n_pathways)) %>% 
                                    pull(taxLevel1_label) %>% unique()),
         effect_type     = case_when(effect_type == "ageLow"  ~ "age 21-49",
                                     effect_type == "ageHigh" ~ "age 50-69",
                                     effect_type == "sexM"    ~ "male",
                                     effect_type == "sexW"    ~ "female",
                                     effect_type == "bmiLow"  ~ "BMI [18, 25)",
                                     effect_type == "bmiHigh" ~ "BMI [25, 31]",
                                     TRUE                     ~ effect_type),
         effect_type     = factor(effect_type, levels = c("BMI [25, 31]", "BMI [18, 25)",
                                                          "female", "male",
                                                          "age 50-69", "age 21-49", "overall")))


# create a color vector for color coding the interaction terms
col_vector <- RColorBrewer::brewer.pal(3, name = "Dark2")

# plot
ggplot(plot_dat_longer, aes(x = intervention, y = effect_type)) +
  geom_tile(aes(fill = fill_tiles, alpha = alpha_tiles)) +
  geom_text(aes(label = value, alpha = alpha_value)) +
  facet_grid(effect_group ~ taxLevel1_label, scales = "free") +
  scale_fill_manual(values = c("no"                             = "white",
                               "Metabolism"                     = col_vector[1],
                               "Genetic Information Processing" = col_vector[2],
                               "Cellular Processes"             = col_vector[3])) +
  theme(axis.title      = element_blank(),
        panel.grid      = element_blank(),
        legend.position = "none",
        strip.background.x = element_rect(fill = "gray97", color = "gray97"),
        strip.text.y    = element_blank())

Code
# ggsave("FigureS20_pathwayScreening_stratificationTable.png", width = 7, height = 4)